Update tokenization_openai.py

This commit is contained in:
Guillem García Subies
2019-08-20 14:07:40 +02:00
committed by GitHub
parent bfd75056b0
commit bb04446285

View File

@@ -89,9 +89,9 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
try: try:
import ftfy import ftfy
import spacy from spacy.lang.en import English
self.nlp = spacy.load('en', disable=['parser', 'tagger', 'ner', 'textcat']) _nlp = English()
self.fix_text = ftfy.fix_text self.nlp = nlp.Defaults.create_tokenizer(_nlp)
except ImportError: except ImportError:
logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.") logger.warning("ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.")
self.nlp = BasicTokenizer(do_lower_case=True) self.nlp = BasicTokenizer(do_lower_case=True)