Make get_special_tokens_mask consider all tokens (#11163)

This commit is contained in:
Sylvain Gugger
2021-04-09 11:57:44 -04:00
committed by GitHub
parent 6060746570
commit 45fc8c7951
36 changed files with 90 additions and 385 deletions

View File

@@ -225,12 +225,9 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(PreTrainedTokenizer):
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
"""
if already_has_special_tokens:
if token_ids_1 is not None:
raise ValueError(
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
)
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
return super().get_special_tokens_mask(
token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
)
if token_ids_1 is None:
return [1] + ([0] * len(token_ids_0)) + [1]

View File

@@ -46,8 +46,7 @@ Tips:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}TokenizerFast
:members: build_inputs_with_special_tokens, get_special_tokens_mask,
create_token_type_ids_from_sequences, save_vocabulary
:members:
{% if "PyTorch" in cookiecutter.generate_tensorflow_and_pytorch -%}