From 83a2347952a789ce97dc3eed1384b195b7ef16fb Mon Sep 17 00:00:00 2001 From: vitaliyradchenko Date: Wed, 25 Dec 2019 16:42:26 +0200 Subject: [PATCH] fixed lack of added and special tokens --- src/transformers/tokenization_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 73075521fe..5b3e795448 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -476,6 +476,7 @@ class PreTrainedTokenizer(object): added_tok_decoder = {v: k for k, v in added_tok_encoder.items()} tokenizer.added_tokens_encoder.update(added_tok_encoder) tokenizer.added_tokens_decoder.update(added_tok_decoder) + tokenizer.unique_added_tokens_encoder.update(set(tokenizer.added_tokens_encoder.keys()).union(set(tokenizer.all_special_tokens))) return tokenizer