Fix properties of unset special tokens in non verbose mode (#17797)

Co-authored-by: SaulLu <55560583+SaulLu@users.noreply.github.com>
This commit is contained in:
Guillaume Klein
2022-06-23 14:40:13 +02:00
committed by GitHub
parent b2fdbaccdd
commit 3eed5530ec
6 changed files with 56 additions and 24 deletions

View File

@@ -31,6 +31,7 @@ from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token
from parameterized import parameterized
from requests.exceptions import HTTPError
from transformers import (
AlbertTokenizer,
@@ -578,6 +579,25 @@ class TokenizerTesterMixin:
self.assertListEqual(getattr(tokenizer, "additional_special_tokens"), [token_to_test_setters])
self.assertListEqual(getattr(tokenizer, "additional_special_tokens_ids"), [token_id_to_test_setters])
@parameterized.expand([(True,), (False,)])
def test_tokenizers_special_tokens_properties_unset(self, verbose):
tokenizers = self.get_tokenizers(verbose=verbose)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
attributes_list = [
"bos_token",
"eos_token",
"unk_token",
"sep_token",
"pad_token",
"cls_token",
"mask_token",
"additional_special_tokens",
]
for attr in attributes_list:
setattr(tokenizer, attr, None)
self.assertIsNone(getattr(tokenizer, attr))
def test_save_and_load_tokenizer(self):
# safety check on max_len default value so we are sure the test works
tokenizers = self.get_tokenizers()