From 3dea40b858c7d311dfd3f55e42823092309df2a2 Mon Sep 17 00:00:00 2001 From: Elman Mansimov Date: Mon, 25 May 2020 16:04:30 -0400 Subject: [PATCH] fixing tokenization of extra_id symbols in T5Tokenizer. Related to issue 4021 (#4353) --- src/transformers/tokenization_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index ef9079540a..b8ed4b5b8c 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -503,6 +503,7 @@ class SpecialTokensMixin: if key in self.SPECIAL_TOKENS_ATTRIBUTES: if key == "additional_special_tokens": assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value) + setattr(self, key, value) elif isinstance(value, AddedTokenFast): setattr(self, key, str(value)) elif isinstance(value, str):