From ac27548b25ffef5966bd11c90419230cbeafe06e Mon Sep 17 00:00:00 2001 From: thomwolf Date: Sat, 27 Jul 2019 11:50:47 +0200 Subject: [PATCH] fix unk_token test --- pytorch_transformers/tokenization_gpt2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_transformers/tokenization_gpt2.py b/pytorch_transformers/tokenization_gpt2.py index afcdf1e64e..29a9ae7660 100644 --- a/pytorch_transformers/tokenization_gpt2.py +++ b/pytorch_transformers/tokenization_gpt2.py @@ -104,7 +104,7 @@ class GPT2Tokenizer(PreTrainedTokenizer): def __init__(self, vocab_file, merges_file, errors='replace', unk_token="<|endoftext|>", bos_token="<|endoftext|>", eos_token="<|endoftext|>", **kwargs): - super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, **kwargs) + super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs) self.encoder = json.load(open(vocab_file)) self.decoder = {v:k for k,v in self.encoder.items()}