change message (#17836)

This commit is contained in:
SaulLu
2022-06-23 14:39:48 +02:00
committed by GitHub
parent d37a68e685
commit b2fdbaccdd

View File

@@ -291,7 +291,10 @@ class BatchEncoding(UserDict):
`List[str]`: The list of tokens at that index. `List[str]`: The list of tokens at that index.
""" """
if not self._encodings: if not self._encodings:
raise ValueError("tokens() is not available when using Python-based tokenizers") raise ValueError(
"tokens() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return self._encodings[batch_index].tokens return self._encodings[batch_index].tokens
def sequence_ids(self, batch_index: int = 0) -> List[Optional[int]]: def sequence_ids(self, batch_index: int = 0) -> List[Optional[int]]:
@@ -312,7 +315,10 @@ class BatchEncoding(UserDict):
sequence. sequence.
""" """
if not self._encodings: if not self._encodings:
raise ValueError("sequence_ids() is not available when using Python-based tokenizers") raise ValueError(
"sequence_ids() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return self._encodings[batch_index].sequence_ids return self._encodings[batch_index].sequence_ids
def words(self, batch_index: int = 0) -> List[Optional[int]]: def words(self, batch_index: int = 0) -> List[Optional[int]]:
@@ -328,7 +334,10 @@ class BatchEncoding(UserDict):
(several tokens will be mapped to the same word index if they are parts of that word). (several tokens will be mapped to the same word index if they are parts of that word).
""" """
if not self._encodings: if not self._encodings:
raise ValueError("words() is not available when using Python-based tokenizers") raise ValueError(
"words() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
warnings.warn( warnings.warn(
"`BatchEncoding.words()` property is deprecated and should be replaced with the identical, " "`BatchEncoding.words()` property is deprecated and should be replaced with the identical, "
"but more self-explanatory `BatchEncoding.word_ids()` property.", "but more self-explanatory `BatchEncoding.word_ids()` property.",
@@ -349,7 +358,10 @@ class BatchEncoding(UserDict):
(several tokens will be mapped to the same word index if they are parts of that word). (several tokens will be mapped to the same word index if they are parts of that word).
""" """
if not self._encodings: if not self._encodings:
raise ValueError("word_ids() is not available when using Python-based tokenizers") raise ValueError(
"word_ids() is not available when using non-fast tokenizers (e.g. instance of a `XxxTokenizerFast`"
" class)."
)
return self._encodings[batch_index].word_ids return self._encodings[batch_index].word_ids
def token_to_sequence(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int: def token_to_sequence(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int: