From 75ff53055138e4672aaf58892b087a055fc011da Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 4 Jan 2021 17:27:29 +0100
Subject: [PATCH] correct docs (#9378)

---
 docs/source/custom_datasets.rst | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/source/custom_datasets.rst b/docs/source/custom_datasets.rst
index 6c796e07e0..924435dd61 100644
--- a/docs/source/custom_datasets.rst
+++ b/docs/source/custom_datasets.rst
@@ -558,12 +558,15 @@ we can use the built in :func:`~transformers.BatchEncoding.char_to_token` method
         end_positions = []
         for i in range(len(answers)):
             start_positions.append(encodings.char_to_token(i, answers[i]['answer_start']))
-            end_positions.append(encodings.char_to_token(i, answers[i]['answer_end'] - 1))
-            # if None, the answer passage has been truncated
+            end_positions.append(encodings.char_to_token(i, answers[i]['answer_end']))
+
+            # if start position is None, the answer passage has been truncated
             if start_positions[-1] is None:
                 start_positions[-1] = tokenizer.model_max_length
+
+            # if end position is None, the 'char_to_token' function points to the space before the correct token - > add + 1
             if end_positions[-1] is None:
-                end_positions[-1] = tokenizer.model_max_length
+                end_positions[-1] = encodings.char_to_token(i, answers[i]['answer_end'] + 1)
         encodings.update({'start_positions': start_positions, 'end_positions': end_positions})
 
     add_token_positions(train_encodings, train_answers)