Update tokenization_xlm.py

This commit is contained in:
Guillem García Subies
2019-08-20 14:06:17 +02:00
committed by GitHub
parent 933841d903
commit bfd75056b0

View File

@@ -124,8 +124,9 @@ class XLMTokenizer(PreTrainedTokenizer):
**kwargs)
try:
import ftfy
import spacy
self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat'])
from spacy.lang.en import English
_nlp = English()
self.nlp = nlp.Defaults.create_tokenizer(_nlp)
self.fix_text = ftfy.fix_text
except ImportError:
logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")