Fix properties of unset special tokens in non verbose mode (#17797)
Co-authored-by: SaulLu <55560583+SaulLu@users.noreply.github.com>
This commit is contained in:
@@ -31,6 +31,7 @@ from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
|
||||
|
||||
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token
|
||||
from parameterized import parameterized
|
||||
from requests.exceptions import HTTPError
|
||||
from transformers import (
|
||||
AlbertTokenizer,
|
||||
@@ -578,6 +579,25 @@ class TokenizerTesterMixin:
|
||||
self.assertListEqual(getattr(tokenizer, "additional_special_tokens"), [token_to_test_setters])
|
||||
self.assertListEqual(getattr(tokenizer, "additional_special_tokens_ids"), [token_id_to_test_setters])
|
||||
|
||||
@parameterized.expand([(True,), (False,)])
|
||||
def test_tokenizers_special_tokens_properties_unset(self, verbose):
|
||||
tokenizers = self.get_tokenizers(verbose=verbose)
|
||||
for tokenizer in tokenizers:
|
||||
with self.subTest(f"{tokenizer.__class__.__name__}"):
|
||||
attributes_list = [
|
||||
"bos_token",
|
||||
"eos_token",
|
||||
"unk_token",
|
||||
"sep_token",
|
||||
"pad_token",
|
||||
"cls_token",
|
||||
"mask_token",
|
||||
"additional_special_tokens",
|
||||
]
|
||||
for attr in attributes_list:
|
||||
setattr(tokenizer, attr, None)
|
||||
self.assertIsNone(getattr(tokenizer, attr))
|
||||
|
||||
def test_save_and_load_tokenizer(self):
|
||||
# safety check on max_len default value so we are sure the test works
|
||||
tokenizers = self.get_tokenizers()
|
||||
|
||||
Reference in New Issue
Block a user