fixing tokenization of extra_id symbols in T5Tokenizer. Related to issue 4021 (#4353)
This commit is contained in:
@@ -503,6 +503,7 @@ class SpecialTokensMixin:
|
|||||||
if key in self.SPECIAL_TOKENS_ATTRIBUTES:
|
if key in self.SPECIAL_TOKENS_ATTRIBUTES:
|
||||||
if key == "additional_special_tokens":
|
if key == "additional_special_tokens":
|
||||||
assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
|
assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
|
||||||
|
setattr(self, key, value)
|
||||||
elif isinstance(value, AddedTokenFast):
|
elif isinstance(value, AddedTokenFast):
|
||||||
setattr(self, key, str(value))
|
setattr(self, key, str(value))
|
||||||
elif isinstance(value, str):
|
elif isinstance(value, str):
|
||||||
|
|||||||
Reference in New Issue
Block a user