From 8581a670e3c3cc8c3ba8d67bd98bb206057641f2 Mon Sep 17 00:00:00 2001 From: Mehrad Moradshahi Date: Mon, 18 May 2020 05:54:04 -0700 Subject: [PATCH] [MbartTokenizer] save to sentencepiece.bpe.model (#4335) --- src/transformers/tokenization_bart.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/transformers/tokenization_bart.py b/src/transformers/tokenization_bart.py index de39815879..a1c8c1972e 100644 --- a/src/transformers/tokenization_bart.py +++ b/src/transformers/tokenization_bart.py @@ -27,8 +27,6 @@ vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-v merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt" _all_bart_models = ["bart-large", "bart-large-mnli", "bart-large-cnn", "bart-large-xsum"] -VOCAB_FILES_NAMES = {"vocab_file": "sentence.bpe.model"} - class BartTokenizer(RobertaTokenizer): # merges and vocab same as Roberta @@ -44,6 +42,6 @@ SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-la class MBartTokenizer(XLMRobertaTokenizer): - vocab_files_names = VOCAB_FILES_NAMES + vocab_files_names = {"vocab_file": "sentencepiece.bpe.model"} max_model_input_sizes = {m: 1024 for m in _all_mbart_models} pretrained_vocab_files_map = {"vocab_file": {m: SPM_URL for m in _all_mbart_models}}