Update tokenization_xlm.py
This commit is contained in:
committed by
GitHub
parent
933841d903
commit
bfd75056b0
@@ -124,8 +124,9 @@ class XLMTokenizer(PreTrainedTokenizer):
|
|||||||
**kwargs)
|
**kwargs)
|
||||||
try:
|
try:
|
||||||
import ftfy
|
import ftfy
|
||||||
import spacy
|
from spacy.lang.en import English
|
||||||
self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat'])
|
_nlp = English()
|
||||||
|
self.nlp = nlp.Defaults.create_tokenizer(_nlp)
|
||||||
self.fix_text = ftfy.fix_text
|
self.fix_text = ftfy.fix_text
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
|
logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
|
||||||
|
|||||||
Reference in New Issue
Block a user