Merge branch 'master' into improved_testing

This commit is contained in:
Thomas Wolf
2019-08-30 13:40:35 +02:00
committed by GitHub
43 changed files with 3103 additions and 56 deletions

View File

@@ -122,6 +122,10 @@ class XLMTokenizer(PreTrainedTokenizer):
cls_token=cls_token, mask_token=mask_token,
additional_special_tokens=additional_special_tokens,
**kwargs)
self.max_len_single_sentence = self.max_len - 2 # take into account special tokens
self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens
try:
import ftfy
from spacy.lang.en import English