Preserve list type of additional_special_tokens in special_token_map (#12759)
* preserve type of `additional_special_tokens` in `special_token_map` * format * Update src/transformers/tokenization_utils_base.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -1192,7 +1192,11 @@ class SpecialTokensMixin:
|
|||||||
for attr in self.SPECIAL_TOKENS_ATTRIBUTES:
|
for attr in self.SPECIAL_TOKENS_ATTRIBUTES:
|
||||||
attr_value = getattr(self, "_" + attr)
|
attr_value = getattr(self, "_" + attr)
|
||||||
if attr_value:
|
if attr_value:
|
||||||
set_attr[attr] = str(attr_value)
|
set_attr[attr] = (
|
||||||
|
type(attr_value)(str(attr_value_sub) for attr_value_sub in attr_value)
|
||||||
|
if isinstance(attr_value, (list, tuple))
|
||||||
|
else str(attr_value)
|
||||||
|
)
|
||||||
return set_attr
|
return set_attr
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -2462,6 +2462,10 @@ class TokenizerTesterMixin:
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
tokenizer_r.add_special_tokens({"additional_special_tokens": ["<testtoken3>", "<testtoken4>"]}), 2
|
tokenizer_r.add_special_tokens({"additional_special_tokens": ["<testtoken3>", "<testtoken4>"]}), 2
|
||||||
)
|
)
|
||||||
|
self.assertIn("<testtoken3>", tokenizer_r.special_tokens_map["additional_special_tokens"])
|
||||||
|
self.assertIsInstance(tokenizer_r.special_tokens_map["additional_special_tokens"], list)
|
||||||
|
self.assertGreaterEqual(len(tokenizer_r.special_tokens_map["additional_special_tokens"]), 2)
|
||||||
|
|
||||||
self.assertEqual(len(tokenizer_r), vocab_size + 8)
|
self.assertEqual(len(tokenizer_r), vocab_size + 8)
|
||||||
|
|
||||||
def test_offsets_mapping(self):
|
def test_offsets_mapping(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user