From c6acbdd50af6d1b915916001c01c6d2760fe316d Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 27 Sep 2019 17:02:29 -0400 Subject: [PATCH] Close #1304 --- transformers/tokenization_roberta.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/transformers/tokenization_roberta.py b/transformers/tokenization_roberta.py index 32e92be211..7adeea689e 100644 --- a/transformers/tokenization_roberta.py +++ b/transformers/tokenization_roberta.py @@ -81,6 +81,8 @@ class RobertaTokenizer(GPT2Tokenizer): bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, sep_token=sep_token, cls_token=cls_token, pad_token=pad_token, mask_token=mask_token, **kwargs) + self.max_len_single_sentence = self.max_len - 2 # take into account special tokens + self.max_len_sentences_pair = self.max_len - 4 # take into account special tokens def add_special_tokens_single_sequence(self, token_ids): """