fix word_to_tokens docstring format (#20450)

* fix docstring * fix 2 * add details
2022-11-25 20:28:00 +01:00
parent a547d5bda5
commit 3c39c07f11
1 changed files with 4 additions and 2 deletions
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -475,8 +475,10 @@ class BatchEncoding(UserDict):
                or 1) the provided word index belongs to.

        Returns:
-            Optional [`~tokenization_utils_base.TokenSpan`] Span of tokens in the encoded sequence. Returns `None` if
-            no tokens correspond to the word.
+            ([`~tokenization_utils_base.TokenSpan`], *optional*): Span of tokens in the encoded sequence. Returns
+            `None` if no tokens correspond to the word. This can happen especially when the token is a special token
+            that has been used to format the tokenization. For example when we add a class token at the very beginning
+            of the tokenization.
        """

        if not self._encodings: