max_len_single_sentence & max_len_sentences_pair as attributes so they can be modified

This commit is contained in:
thomwolf
2019-08-23 22:07:26 +02:00
parent ab7bd5ef98
commit 3bcbebd440
8 changed files with 26 additions and 40 deletions

View File

@@ -122,6 +122,10 @@ class XLMTokenizer(PreTrainedTokenizer):
cls_token=cls_token, mask_token=mask_token,
additional_special_tokens=additional_special_tokens,
**kwargs)
self.max_len_single_sentence = self.max_len - 2 # take into account special tokens
self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens
try:
import ftfy
from spacy.lang.en import English
@@ -215,14 +219,6 @@ class XLMTokenizer(PreTrainedTokenizer):
out_string = ''.join(tokens).replace('</w>', ' ').strip()
return out_string
@property
def max_len_single_sentence(self):
return self.max_len - 2 # take into account special tokens
@property
def max_len_sentences_pair(self):
return self.max_len - 3 # take into account special tokens
def add_special_tokens_single_sentence(self, token_ids):
"""
Adds special tokens to a sequence for sequence classification tasks.