From 3c39c07f1160beb6190f635e2dab6a27e6d67596 Mon Sep 17 00:00:00 2001
From: SaulLu <55560583+SaulLu@users.noreply.github.com>
Date: Fri, 25 Nov 2022 20:28:00 +0100
Subject: [PATCH] fix `word_to_tokens` docstring format (#20450)

* fix docstring

* fix 2

* add details
---
 src/transformers/tokenization_utils_base.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index b58ffc69c2..e3721f8131 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -475,8 +475,10 @@ class BatchEncoding(UserDict):
                 or 1) the provided word index belongs to.
 
         Returns:
-            Optional [`~tokenization_utils_base.TokenSpan`] Span of tokens in the encoded sequence. Returns `None` if
-            no tokens correspond to the word.
+            ([`~tokenization_utils_base.TokenSpan`], *optional*): Span of tokens in the encoded sequence. Returns
+            `None` if no tokens correspond to the word. This can happen especially when the token is a special token
+            that has been used to format the tokenization. For example when we add a class token at the very beginning
+            of the tokenization.
         """
 
         if not self._encodings: