Update tokenizers to 0.7.0-rc5 (#3705)

This commit is contained in:
Anthony MOI
2020-04-10 14:23:49 -04:00
committed by GitHub
parent 551b450527
commit b7cf9f43d2
4 changed files with 6 additions and 8 deletions

View File

@@ -265,12 +265,10 @@ class _OpenAIGPTCharBPETokenizer(BaseTokenizer):
):
if vocab_file is not None and merges_file is not None:
tokenizer = Tokenizer(
BPE.from_files(
vocab_file, merges_file, dropout=dropout, unk_token=unk_token, end_of_word_suffix=suffix
)
BPE(vocab_file, merges_file, dropout=dropout, unk_token=unk_token, end_of_word_suffix=suffix)
)
else:
tokenizer = Tokenizer(BPE.empty())
tokenizer = Tokenizer(BPE())
# Check for Unicode normalization first (before everything else)
normalizers = []

View File

@@ -362,7 +362,7 @@ class _TransfoXLDelimiterLookupTokenizer(BaseTokenizer):
):
try:
tokenizer = WordLevel.from_files(vocab_file, unk_token=unk_token)
tokenizer = WordLevel(vocab_file, unk_token=unk_token)
tokenizer = Tokenizer(tokenizer)
except Exception:
raise ValueError(