From bfd75056b0a080addafb7f3d7c9336d27b883a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillem=20Garc=C3=ADa=20Subies?= <37592763+GuillemGSubies@users.noreply.github.com> Date: Tue, 20 Aug 2019 14:06:17 +0200 Subject: [PATCH] Update tokenization_xlm.py --- pytorch_transformers/tokenization_xlm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_transformers/tokenization_xlm.py b/pytorch_transformers/tokenization_xlm.py index b690a3a945..8e7c2954f2 100644 --- a/pytorch_transformers/tokenization_xlm.py +++ b/pytorch_transformers/tokenization_xlm.py @@ -124,8 +124,9 @@ class XLMTokenizer(PreTrainedTokenizer): **kwargs) try: import ftfy - import spacy - self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat']) + from spacy.lang.en import English + _nlp = English() + self.nlp = nlp.Defaults.create_tokenizer(_nlp) self.fix_text = ftfy.fix_text except ImportError: logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")