adding max_lengths for single sentences and sentences pairs

This commit is contained in:
thomwolf
2019-08-23 17:31:11 +02:00
parent 90dcd8c05d
commit 47d6853439
5 changed files with 40 additions and 0 deletions

View File

@@ -215,6 +215,14 @@ class XLMTokenizer(PreTrainedTokenizer):
out_string = ''.join(tokens).replace('</w>', ' ').strip()
return out_string
@property
def max_len_single_sentence(self):
return self.max_len - 2 # take into account special tokens
@property
def max_len_sentences_pair(self):
return self.max_len - 3 # take into account special tokens
def add_special_tokens_single_sentence(self, token_ids):
"""
Adds special tokens to a sequence for sequence classification tasks.