From ae152cec09b496101841dcbc59613cc7a3d133a4 Mon Sep 17 00:00:00 2001 From: Joel Grus Date: Wed, 24 Jul 2019 16:54:48 -0700 Subject: [PATCH] make save_pretrained work with added tokens right now it's dumping the *decoder* when it should be dumping the *encoder*. this fixes that. --- pytorch_transformers/tokenization_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_transformers/tokenization_utils.py b/pytorch_transformers/tokenization_utils.py index f603a29d74..858edc7c50 100644 --- a/pytorch_transformers/tokenization_utils.py +++ b/pytorch_transformers/tokenization_utils.py @@ -266,7 +266,7 @@ class PreTrainedTokenizer(object): with open(added_tokens_file, 'w', encoding='utf-8') as f: if self.added_tokens_encoder: - out_str = json.dumps(self.added_tokens_decoder, ensure_ascii=False) + out_str = json.dumps(self.added_tokens_encoder, ensure_ascii=False) else: out_str = u"{}" f.write(out_str)