From 55e8d0cea25be18b044523b30f4bef58fec63289 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Tue, 10 Nov 2020 14:03:29 +0100 Subject: [PATCH] Update links from s3 to huggingface.co --- .../seq2seq/bertabs/configuration_bertabs.py | 2 +- src/transformers/configuration_albert.py | 16 ++--- src/transformers/configuration_bart.py | 14 ++-- src/transformers/configuration_bert.py | 44 ++++++------ src/transformers/configuration_camembert.py | 6 +- src/transformers/configuration_ctrl.py | 2 +- src/transformers/configuration_deberta.py | 4 +- src/transformers/configuration_distilbert.py | 14 ++-- src/transformers/configuration_dpr.py | 12 ++-- src/transformers/configuration_electra.py | 12 ++-- src/transformers/configuration_flaubert.py | 8 +-- src/transformers/configuration_funnel.py | 20 +++--- src/transformers/configuration_gpt2.py | 10 +-- src/transformers/configuration_layoutlm.py | 4 +- src/transformers/configuration_longformer.py | 10 +-- src/transformers/configuration_marian.py | 2 +- src/transformers/configuration_mbart.py | 4 +- src/transformers/configuration_mobilebert.py | 2 +- src/transformers/configuration_openai.py | 2 +- src/transformers/configuration_prophetnet.py | 2 +- src/transformers/configuration_retribert.py | 2 +- src/transformers/configuration_roberta.py | 12 ++-- src/transformers/configuration_squeezebert.py | 6 +- src/transformers/configuration_t5.py | 10 +-- src/transformers/configuration_transfo_xl.py | 2 +- src/transformers/configuration_xlm.py | 20 +++--- .../configuration_xlm_prophetnet.py | 2 +- src/transformers/configuration_xlm_roberta.py | 12 ++-- src/transformers/configuration_xlnet.py | 4 +- src/transformers/tokenization_albert.py | 16 ++--- src/transformers/tokenization_albert_fast.py | 32 ++++----- src/transformers/tokenization_bart.py | 4 +- src/transformers/tokenization_bart_fast.py | 6 +- src/transformers/tokenization_bert.py | 34 +++++----- src/transformers/tokenization_bert_fast.py | 68 +++++++++---------- .../tokenization_bert_generation.py | 2 +- .../tokenization_bert_japanese.py | 8 +-- src/transformers/tokenization_bertweet.py | 4 +- src/transformers/tokenization_camembert.py | 2 +- .../tokenization_camembert_fast.py | 4 +- src/transformers/tokenization_deberta.py | 4 +- src/transformers/tokenization_distilbert.py | 12 ++-- .../tokenization_distilbert_fast.py | 24 +++---- src/transformers/tokenization_dpr.py | 24 +++---- src/transformers/tokenization_dpr_fast.py | 24 +++---- src/transformers/tokenization_electra.py | 12 ++-- src/transformers/tokenization_electra_fast.py | 24 +++---- src/transformers/tokenization_flaubert.py | 16 ++--- src/transformers/tokenization_funnel.py | 20 +++--- src/transformers/tokenization_funnel_fast.py | 40 +++++------ src/transformers/tokenization_gpt2.py | 20 +++--- src/transformers/tokenization_gpt2_fast.py | 30 ++++---- src/transformers/tokenization_layoutlm.py | 4 +- .../tokenization_layoutlm_fast.py | 8 +-- src/transformers/tokenization_longformer.py | 4 +- .../tokenization_longformer_fast.py | 6 +- src/transformers/tokenization_lxmert.py | 2 +- src/transformers/tokenization_lxmert_fast.py | 4 +- src/transformers/tokenization_marian.py | 2 +- src/transformers/tokenization_mbart.py | 2 +- src/transformers/tokenization_mbart_fast.py | 4 +- src/transformers/tokenization_mobilebert.py | 2 +- .../tokenization_mobilebert_fast.py | 4 +- src/transformers/tokenization_openai.py | 4 +- src/transformers/tokenization_openai_fast.py | 6 +- src/transformers/tokenization_phobert.py | 8 +-- src/transformers/tokenization_prophetnet.py | 2 +- src/transformers/tokenization_retribert.py | 2 +- .../tokenization_retribert_fast.py | 4 +- src/transformers/tokenization_roberta.py | 24 +++---- src/transformers/tokenization_roberta_fast.py | 36 +++++----- src/transformers/tokenization_squeezebert.py | 6 +- .../tokenization_squeezebert_fast.py | 12 ++-- src/transformers/tokenization_t5.py | 10 +-- src/transformers/tokenization_t5_fast.py | 20 +++--- src/transformers/tokenization_transfo_xl.py | 4 +- src/transformers/tokenization_xlm.py | 40 +++++------ src/transformers/tokenization_xlm_roberta.py | 12 ++-- .../tokenization_xlm_roberta_fast.py | 24 +++---- src/transformers/tokenization_xlnet.py | 4 +- src/transformers/tokenization_xlnet_fast.py | 8 +-- 81 files changed, 479 insertions(+), 479 deletions(-) diff --git a/examples/seq2seq/bertabs/configuration_bertabs.py b/examples/seq2seq/bertabs/configuration_bertabs.py index 68dc0df814..02b8f27cb3 100644 --- a/examples/seq2seq/bertabs/configuration_bertabs.py +++ b/examples/seq2seq/bertabs/configuration_bertabs.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) BERTABS_FINETUNED_CONFIG_MAP = { - "bertabs-finetuned-cnndm": "https://s3.amazonaws.com/models.huggingface.co/bert/remi/bertabs-finetuned-cnndm-extractive-abstractive-summarization/config.json", + "bertabs-finetuned-cnndm": "https://huggingface.co/remi/bertabs-finetuned-cnndm-extractive-abstractive-summarization/resolve/main/config.json", } diff --git a/src/transformers/configuration_albert.py b/src/transformers/configuration_albert.py index 78bde71570..a4a0595a8f 100644 --- a/src/transformers/configuration_albert.py +++ b/src/transformers/configuration_albert.py @@ -19,14 +19,14 @@ from .configuration_utils import PretrainedConfig ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "albert-base-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v1-config.json", - "albert-large-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v1-config.json", - "albert-xlarge-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v1-config.json", - "albert-xxlarge-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v1-config.json", - "albert-base-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v2-config.json", - "albert-large-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v2-config.json", - "albert-xlarge-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v2-config.json", - "albert-xxlarge-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-config.json", + "albert-base-v1": "https://huggingface.co/albert-base-v1/resolve/main/config.json", + "albert-large-v1": "https://huggingface.co/albert-large-v1/resolve/main/config.json", + "albert-xlarge-v1": "https://huggingface.co/albert-xlarge-v1/resolve/main/config.json", + "albert-xxlarge-v1": "https://huggingface.co/albert-xxlarge-v1/resolve/main/config.json", + "albert-base-v2": "https://huggingface.co/albert-base-v2/resolve/main/config.json", + "albert-large-v2": "https://huggingface.co/albert-large-v2/resolve/main/config.json", + "albert-xlarge-v2": "https://huggingface.co/albert-xlarge-v2/resolve/main/config.json", + "albert-xxlarge-v2": "https://huggingface.co/albert-xxlarge-v2/resolve/main/config.json", } diff --git a/src/transformers/configuration_bart.py b/src/transformers/configuration_bart.py index 1bc06624a0..0947d03584 100644 --- a/src/transformers/configuration_bart.py +++ b/src/transformers/configuration_bart.py @@ -21,13 +21,13 @@ from .utils import logging logger = logging.get_logger(__name__) BART_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/bart-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-base/config.json", - "facebook/bart-large": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large/config.json", - "facebook/bart-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-mnli/config.json", - "facebook/bart-large-cnn": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-cnn/config.json", - "facebook/bart-large-xsum": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-xsum/config.json", - "facebook/mbart-large-en-ro": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/config.json", - "yjernite/bart_eli5": "https://s3.amazonaws.com/models.huggingface.co/bert/yjernite/bart_eli5/config.json", + "facebook/bart-base": "https://huggingface.co/facebook/bart-base/resolve/main/config.json", + "facebook/bart-large": "https://huggingface.co/facebook/bart-large/resolve/main/config.json", + "facebook/bart-large-mnli": "https://huggingface.co/facebook/bart-large-mnli/resolve/main/config.json", + "facebook/bart-large-cnn": "https://huggingface.co/facebook/bart-large-cnn/resolve/main/config.json", + "facebook/bart-large-xsum": "https://huggingface.co/facebook/bart-large-xsum/resolve/main/config.json", + "facebook/mbart-large-en-ro": "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/config.json", + "yjernite/bart_eli5": "https://huggingface.co/yjernite/bart_eli5/resolve/main/config.json", } diff --git a/src/transformers/configuration_bert.py b/src/transformers/configuration_bert.py index 5cb86168d0..19a7c8ed27 100644 --- a/src/transformers/configuration_bert.py +++ b/src/transformers/configuration_bert.py @@ -22,28 +22,28 @@ from .utils import logging logger = logging.get_logger(__name__) BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json", - "bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json", - "bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json", - "bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-config.json", - "bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-config.json", - "bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-config.json", - "bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-config.json", - "bert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json", - "bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json", - "bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-config.json", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-config.json", - "bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-config.json", - "bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-config.json", - "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-config.json", - "cl-tohoku/bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/config.json", - "cl-tohoku/bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/config.json", - "cl-tohoku/bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/config.json", - "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/config.json", - "TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/config.json", - "TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/config.json", - "wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/config.json", + "bert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/config.json", + "bert-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/config.json", + "bert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/config.json", + "bert-large-cased": "https://huggingface.co/bert-large-cased/resolve/main/config.json", + "bert-base-multilingual-uncased": "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/config.json", + "bert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json", + "bert-base-chinese": "https://huggingface.co/bert-base-chinese/resolve/main/config.json", + "bert-base-german-cased": "https://huggingface.co/bert-base-german-cased/resolve/main/config.json", + "bert-large-uncased-whole-word-masking": "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/config.json", + "bert-large-cased-whole-word-masking": "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/config.json", + "bert-large-uncased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/config.json", + "bert-large-cased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/config.json", + "bert-base-cased-finetuned-mrpc": "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/config.json", + "bert-base-german-dbmdz-cased": "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/config.json", + "bert-base-german-dbmdz-uncased": "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/config.json", + "cl-tohoku/bert-base-japanese": "https://huggingface.co/cl-tohoku/bert-base-japanese/resolve/main/config.json", + "cl-tohoku/bert-base-japanese-whole-word-masking": "https://huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/resolve/main/config.json", + "cl-tohoku/bert-base-japanese-char": "https://huggingface.co/cl-tohoku/bert-base-japanese-char/resolve/main/config.json", + "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/resolve/main/config.json", + "TurkuNLP/bert-base-finnish-cased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/config.json", + "TurkuNLP/bert-base-finnish-uncased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/config.json", + "wietsedv/bert-base-dutch-cased": "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/config.json", # See all BERT models at https://huggingface.co/models?filter=bert } diff --git a/src/transformers/configuration_camembert.py b/src/transformers/configuration_camembert.py index c319c59915..64e64e9f62 100644 --- a/src/transformers/configuration_camembert.py +++ b/src/transformers/configuration_camembert.py @@ -22,9 +22,9 @@ from .utils import logging logger = logging.get_logger(__name__) CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-config.json", - "umberto-commoncrawl-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-commoncrawl-cased-v1/config.json", - "umberto-wikipedia-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/Musixmatch/umberto-wikipedia-uncased-v1/config.json", + "camembert-base": "https://huggingface.co/camembert-base/resolve/main/config.json", + "umberto-commoncrawl-cased-v1": "https://huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/resolve/main/config.json", + "umberto-wikipedia-uncased-v1": "https://huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/resolve/main/config.json", } diff --git a/src/transformers/configuration_ctrl.py b/src/transformers/configuration_ctrl.py index 4549a84ecc..f5e876697b 100644 --- a/src/transformers/configuration_ctrl.py +++ b/src/transformers/configuration_ctrl.py @@ -20,7 +20,7 @@ from .utils import logging logger = logging.get_logger(__name__) -CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://s3.amazonaws.com/models.huggingface.co/bert/ctrl-config.json"} +CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://huggingface.co/ctrl/resolve/main/config.json"} class CTRLConfig(PretrainedConfig): diff --git a/src/transformers/configuration_deberta.py b/src/transformers/configuration_deberta.py index ffc236df41..637dddd987 100644 --- a/src/transformers/configuration_deberta.py +++ b/src/transformers/configuration_deberta.py @@ -21,8 +21,8 @@ from .utils import logging logger = logging.get_logger(__name__) DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/deberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/deberta-base/config.json", - "microsoft/deberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/deberta-large/config.json", + "microsoft/deberta-base": "https://huggingface.co/microsoft/deberta-base/resolve/main/config.json", + "microsoft/deberta-large": "https://huggingface.co/microsoft/deberta-large/resolve/main/config.json", } diff --git a/src/transformers/configuration_distilbert.py b/src/transformers/configuration_distilbert.py index fce9563af2..269a9cbac6 100644 --- a/src/transformers/configuration_distilbert.py +++ b/src/transformers/configuration_distilbert.py @@ -21,13 +21,13 @@ from .utils import logging logger = logging.get_logger(__name__) DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "distilbert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-config.json", - "distilbert-base-uncased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-distilled-squad-config.json", - "distilbert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-cased-config.json", - "distilbert-base-cased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-cased-distilled-squad-config.json", - "distilbert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-german-cased-config.json", - "distilbert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-multilingual-cased-config.json", - "distilbert-base-uncased-finetuned-sst-2-english": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json", + "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased/resolve/main/config.json", + "distilbert-base-uncased-distilled-squad": "https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/config.json", + "distilbert-base-cased": "https://huggingface.co/distilbert-base-cased/resolve/main/config.json", + "distilbert-base-cased-distilled-squad": "https://huggingface.co/distilbert-base-cased-distilled-squad/resolve/main/config.json", + "distilbert-base-german-cased": "https://huggingface.co/distilbert-base-german-cased/resolve/main/config.json", + "distilbert-base-multilingual-cased": "https://huggingface.co/distilbert-base-multilingual-cased/resolve/main/config.json", + "distilbert-base-uncased-finetuned-sst-2-english": "https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json", } diff --git a/src/transformers/configuration_dpr.py b/src/transformers/configuration_dpr.py index 506a2c4f5b..3ea5967947 100644 --- a/src/transformers/configuration_dpr.py +++ b/src/transformers/configuration_dpr.py @@ -21,12 +21,12 @@ from .utils import logging logger = logging.get_logger(__name__) DPR_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/dpr-ctx_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-ctx_encoder-single-nq-base/config.json", - "facebook/dpr-question_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-question_encoder-single-nq-base/config.json", - "facebook/dpr-reader-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-reader-single-nq-base/config.json", - "facebook/dpr-ctx_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-ctx_encoder-multiset-base/config.json", - "facebook/dpr-question_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-question_encoder-multiset-base/config.json", - "facebook/dpr-reader-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-reader-multiset-base/config.json", + "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-ctx_encoder-single-nq-base/resolve/main/config.json", + "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/facebook/dpr-question_encoder-single-nq-base/resolve/main/config.json", + "facebook/dpr-reader-single-nq-base": "https://huggingface.co/facebook/dpr-reader-single-nq-base/resolve/main/config.json", + "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/facebook/dpr-ctx_encoder-multiset-base/resolve/main/config.json", + "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/facebook/dpr-question_encoder-multiset-base/resolve/main/config.json", + "facebook/dpr-reader-multiset-base": "https://huggingface.co/facebook/dpr-reader-multiset-base/resolve/main/config.json", } diff --git a/src/transformers/configuration_electra.py b/src/transformers/configuration_electra.py index 066c1d501e..439d80ca67 100644 --- a/src/transformers/configuration_electra.py +++ b/src/transformers/configuration_electra.py @@ -22,12 +22,12 @@ from .utils import logging logger = logging.get_logger(__name__) ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "google/electra-small-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-generator/config.json", - "google/electra-base-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-generator/config.json", - "google/electra-large-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-generator/config.json", - "google/electra-small-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-discriminator/config.json", - "google/electra-base-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-discriminator/config.json", - "google/electra-large-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-discriminator/config.json", + "google/electra-small-generator": "https://huggingface.co/google/electra-small-generator/resolve/main/config.json", + "google/electra-base-generator": "https://huggingface.co/google/electra-base-generator/resolve/main/config.json", + "google/electra-large-generator": "https://huggingface.co/google/electra-large-generator/resolve/main/config.json", + "google/electra-small-discriminator": "https://huggingface.co/google/electra-small-discriminator/resolve/main/config.json", + "google/electra-base-discriminator": "https://huggingface.co/google/electra-base-discriminator/resolve/main/config.json", + "google/electra-large-discriminator": "https://huggingface.co/google/electra-large-discriminator/resolve/main/config.json", } diff --git a/src/transformers/configuration_flaubert.py b/src/transformers/configuration_flaubert.py index 64c02c21a8..508543ecbe 100644 --- a/src/transformers/configuration_flaubert.py +++ b/src/transformers/configuration_flaubert.py @@ -21,10 +21,10 @@ from .utils import logging logger = logging.get_logger(__name__) FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "flaubert/flaubert_small_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/config.json", - "flaubert/flaubert_base_uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/config.json", - "flaubert/flaubert_base_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/config.json", - "flaubert/flaubert_large_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/config.json", + "flaubert/flaubert_small_cased": "https://huggingface.co/flaubert/flaubert_small_cased/resolve/main/config.json", + "flaubert/flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased/resolve/main/config.json", + "flaubert/flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased/resolve/main/config.json", + "flaubert/flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased/resolve/main/config.json", } diff --git a/src/transformers/configuration_funnel.py b/src/transformers/configuration_funnel.py index 7883aec923..6a463240ad 100644 --- a/src/transformers/configuration_funnel.py +++ b/src/transformers/configuration_funnel.py @@ -21,16 +21,16 @@ from .utils import logging logger = logging.get_logger(__name__) FUNNEL_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "funnel-transformer/small": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small/config.json", - "funnel-transformer/small-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small-base/config.json", - "funnel-transformer/medium": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium/config.json", - "funnel-transformer/medium-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium-base/config.json", - "funnel-transformer/intermediate": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate/config.json", - "funnel-transformer/intermediate-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate-base/config.json", - "funnel-transformer/large": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large/config.json", - "funnel-transformer/large-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large-base/config.json", - "funnel-transformer/xlarge": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge/config.json", - "funnel-transformer/xlarge-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge-base/config.json", + "funnel-transformer/small": "https://huggingface.co/funnel-transformer/small/resolve/main/config.json", + "funnel-transformer/small-base": "https://huggingface.co/funnel-transformer/small-base/resolve/main/config.json", + "funnel-transformer/medium": "https://huggingface.co/funnel-transformer/medium/resolve/main/config.json", + "funnel-transformer/medium-base": "https://huggingface.co/funnel-transformer/medium-base/resolve/main/config.json", + "funnel-transformer/intermediate": "https://huggingface.co/funnel-transformer/intermediate/resolve/main/config.json", + "funnel-transformer/intermediate-base": "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/config.json", + "funnel-transformer/large": "https://huggingface.co/funnel-transformer/large/resolve/main/config.json", + "funnel-transformer/large-base": "https://huggingface.co/funnel-transformer/large-base/resolve/main/config.json", + "funnel-transformer/xlarge": "https://huggingface.co/funnel-transformer/xlarge/resolve/main/config.json", + "funnel-transformer/xlarge-base": "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/config.json", } diff --git a/src/transformers/configuration_gpt2.py b/src/transformers/configuration_gpt2.py index 7a054e4bbe..9264263c7c 100644 --- a/src/transformers/configuration_gpt2.py +++ b/src/transformers/configuration_gpt2.py @@ -22,11 +22,11 @@ from .utils import logging logger = logging.get_logger(__name__) GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-config.json", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-config.json", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-config.json", + "gpt2": "https://huggingface.co/gpt2/resolve/main/config.json", + "gpt2-medium": "https://huggingface.co/gpt2-medium/resolve/main/config.json", + "gpt2-large": "https://huggingface.co/gpt2-large/resolve/main/config.json", + "gpt2-xl": "https://huggingface.co/gpt2-xl/resolve/main/config.json", + "distilgpt2": "https://huggingface.co/distilgpt2/resolve/main/config.json", } diff --git a/src/transformers/configuration_layoutlm.py b/src/transformers/configuration_layoutlm.py index f16e17e4ea..1b629ca7d0 100644 --- a/src/transformers/configuration_layoutlm.py +++ b/src/transformers/configuration_layoutlm.py @@ -22,8 +22,8 @@ from .utils import logging logger = logging.get_logger(__name__) LAYOUTLM_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "layoutlm-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/layoutlm-base-uncased/config.json", - "layoutlm-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/layoutlm-large-uncased/config.json", + "layoutlm-base-uncased": "https://huggingface.co/microsoft/layoutlm-base-uncased/resolve/main/config.json", + "layoutlm-large-uncased": "https://huggingface.co/microsoft/layoutlm-large-uncased/resolve/main/config.json", } diff --git a/src/transformers/configuration_longformer.py b/src/transformers/configuration_longformer.py index 6d5a934086..55178b5fdf 100644 --- a/src/transformers/configuration_longformer.py +++ b/src/transformers/configuration_longformer.py @@ -23,11 +23,11 @@ from .utils import logging logger = logging.get_logger(__name__) LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "allenai/longformer-base-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096/config.json", - "allenai/longformer-large-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096/config.json", - "allenai/longformer-large-4096-finetuned-triviaqa": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-finetuned-triviaqa/config.json", - "allenai/longformer-base-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096-extra.pos.embd.only/config.json", - "allenai/longformer-large-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-extra.pos.embd.only/config.json", + "allenai/longformer-base-4096": "https://huggingface.co/allenai/longformer-base-4096/resolve/main/config.json", + "allenai/longformer-large-4096": "https://huggingface.co/allenai/longformer-large-4096/resolve/main/config.json", + "allenai/longformer-large-4096-finetuned-triviaqa": "https://huggingface.co/allenai/longformer-large-4096-finetuned-triviaqa/resolve/main/config.json", + "allenai/longformer-base-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-base-4096-extra.pos.embd.only/resolve/main/config.json", + "allenai/longformer-large-4096-extra.pos.embd.only": "https://huggingface.co/allenai/longformer-large-4096-extra.pos.embd.only/resolve/main/config.json", } diff --git a/src/transformers/configuration_marian.py b/src/transformers/configuration_marian.py index 042062a314..d389a0cd22 100644 --- a/src/transformers/configuration_marian.py +++ b/src/transformers/configuration_marian.py @@ -18,7 +18,7 @@ from .configuration_bart import BartConfig PRETRAINED_CONFIG_ARCHIVE_MAP = { - "Helsinki-NLP/opus-mt-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json", + "Helsinki-NLP/opus-mt-en-de": "https://huggingface.co/Helsinki-NLP/opus-mt-en-de/resolve/main/config.json", } diff --git a/src/transformers/configuration_mbart.py b/src/transformers/configuration_mbart.py index b03b6f9777..ecea31633a 100644 --- a/src/transformers/configuration_mbart.py +++ b/src/transformers/configuration_mbart.py @@ -21,8 +21,8 @@ from .utils import logging logger = logging.get_logger(__name__) MBART_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "facebook/mbart-large-en-ro": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/config.json", - "facebook/mbart-large-cc25": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-cc25/config.json", + "facebook/mbart-large-en-ro": "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/config.json", + "facebook/mbart-large-cc25": "https://huggingface.co/facebook/mbart-large-cc25/resolve/main/config.json", } diff --git a/src/transformers/configuration_mobilebert.py b/src/transformers/configuration_mobilebert.py index 37493d1fb6..2342fbfcc4 100644 --- a/src/transformers/configuration_mobilebert.py +++ b/src/transformers/configuration_mobilebert.py @@ -19,7 +19,7 @@ from .utils import logging logger = logging.get_logger(__name__) MOBILEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "mobilebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/google/mobilebert-uncased/config.json" + "mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/config.json" } diff --git a/src/transformers/configuration_openai.py b/src/transformers/configuration_openai.py index 2301c36922..26068dd4fb 100644 --- a/src/transformers/configuration_openai.py +++ b/src/transformers/configuration_openai.py @@ -22,7 +22,7 @@ from .utils import logging logger = logging.get_logger(__name__) OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "openai-gpt": "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-config.json" + "openai-gpt": "https://huggingface.co/openai-gpt/resolve/main/config.json" } diff --git a/src/transformers/configuration_prophetnet.py b/src/transformers/configuration_prophetnet.py index 451c93954c..e62a22b09a 100644 --- a/src/transformers/configuration_prophetnet.py +++ b/src/transformers/configuration_prophetnet.py @@ -22,7 +22,7 @@ from .utils import logging logger = logging.get_logger(__name__) PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/prophetnet-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/prophetnet-large-uncased/config.json", + "microsoft/prophetnet-large-uncased": "https://huggingface.co/microsoft/prophetnet-large-uncased/resolve/main/config.json", } diff --git a/src/transformers/configuration_retribert.py b/src/transformers/configuration_retribert.py index a68801cbcc..1011c19c9c 100644 --- a/src/transformers/configuration_retribert.py +++ b/src/transformers/configuration_retribert.py @@ -22,7 +22,7 @@ logger = logging.get_logger(__name__) # TODO: upload to AWS RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "retribert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-config.json", + "retribert-base-uncased": "https://huggingface.co/distilbert-base-uncased/resolve/main/config.json", } diff --git a/src/transformers/configuration_roberta.py b/src/transformers/configuration_roberta.py index 9b393d8f0c..5e214700ed 100644 --- a/src/transformers/configuration_roberta.py +++ b/src/transformers/configuration_roberta.py @@ -22,12 +22,12 @@ from .utils import logging logger = logging.get_logger(__name__) ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-config.json", - "roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-config.json", - "roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-config.json", - "distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-config.json", - "roberta-base-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-openai-detector-config.json", - "roberta-large-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-openai-detector-config.json", + "roberta-base": "https://huggingface.co/roberta-base/resolve/main/config.json", + "roberta-large": "https://huggingface.co/roberta-large/resolve/main/config.json", + "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/config.json", + "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/config.json", + "roberta-base-openai-detector": "https://huggingface.co/roberta-base-openai-detector/resolve/main/config.json", + "roberta-large-openai-detector": "https://huggingface.co/roberta-large-openai-detector/resolve/main/config.json", } diff --git a/src/transformers/configuration_squeezebert.py b/src/transformers/configuration_squeezebert.py index 80f456cb5b..82ce12eb3d 100644 --- a/src/transformers/configuration_squeezebert.py +++ b/src/transformers/configuration_squeezebert.py @@ -21,9 +21,9 @@ from .utils import logging logger = logging.get_logger(__name__) SQUEEZEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "squeezebert/squeezebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-uncased/config.json", - "squeezebert/squeezebert-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-mnli/config.json", - "squeezebert/squeezebert-mnli-headless": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-mnli-headless/config.json", + "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/config.json", + "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/config.json", + "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/config.json", } diff --git a/src/transformers/configuration_t5.py b/src/transformers/configuration_t5.py index 49a3e56cce..6c9b9d6929 100644 --- a/src/transformers/configuration_t5.py +++ b/src/transformers/configuration_t5.py @@ -21,11 +21,11 @@ from .utils import logging logger = logging.get_logger(__name__) T5_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "t5-small": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-small-config.json", - "t5-base": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json", - "t5-large": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-large-config.json", - "t5-3b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-3b-config.json", - "t5-11b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-11b-config.json", + "t5-small": "https://huggingface.co/t5-small/resolve/main/config.json", + "t5-base": "https://huggingface.co/t5-base/resolve/main/config.json", + "t5-large": "https://huggingface.co/t5-large/resolve/main/config.json", + "t5-3b": "https://huggingface.co/t5-3b/resolve/main/config.json", + "t5-11b": "https://huggingface.co/t5-11b/resolve/main/config.json", } diff --git a/src/transformers/configuration_transfo_xl.py b/src/transformers/configuration_transfo_xl.py index ccd88ac44e..03afa6e132 100644 --- a/src/transformers/configuration_transfo_xl.py +++ b/src/transformers/configuration_transfo_xl.py @@ -25,7 +25,7 @@ from .utils import logging logger = logging.get_logger(__name__) TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "transfo-xl-wt103": "https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-config.json", + "transfo-xl-wt103": "https://huggingface.co/transfo-xl-wt103/resolve/main/config.json", } diff --git a/src/transformers/configuration_xlm.py b/src/transformers/configuration_xlm.py index 7e0e2d307e..5903eac763 100644 --- a/src/transformers/configuration_xlm.py +++ b/src/transformers/configuration_xlm.py @@ -21,16 +21,16 @@ from .utils import logging logger = logging.get_logger(__name__) XLM_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "xlm-mlm-en-2048": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-config.json", - "xlm-mlm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-ende-1024-config.json", - "xlm-mlm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enfr-1024-config.json", - "xlm-mlm-enro-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enro-1024-config.json", - "xlm-mlm-tlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-tlm-xnli15-1024-config.json", - "xlm-mlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-xnli15-1024-config.json", - "xlm-clm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-enfr-1024-config.json", - "xlm-clm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-ende-1024-config.json", - "xlm-mlm-17-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-config.json", - "xlm-mlm-100-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-100-1280-config.json", + "xlm-mlm-en-2048": "https://huggingface.co/xlm-mlm-en-2048/resolve/main/config.json", + "xlm-mlm-ende-1024": "https://huggingface.co/xlm-mlm-ende-1024/resolve/main/config.json", + "xlm-mlm-enfr-1024": "https://huggingface.co/xlm-mlm-enfr-1024/resolve/main/config.json", + "xlm-mlm-enro-1024": "https://huggingface.co/xlm-mlm-enro-1024/resolve/main/config.json", + "xlm-mlm-tlm-xnli15-1024": "https://huggingface.co/xlm-mlm-tlm-xnli15-1024/resolve/main/config.json", + "xlm-mlm-xnli15-1024": "https://huggingface.co/xlm-mlm-xnli15-1024/resolve/main/config.json", + "xlm-clm-enfr-1024": "https://huggingface.co/xlm-clm-enfr-1024/resolve/main/config.json", + "xlm-clm-ende-1024": "https://huggingface.co/xlm-clm-ende-1024/resolve/main/config.json", + "xlm-mlm-17-1280": "https://huggingface.co/xlm-mlm-17-1280/resolve/main/config.json", + "xlm-mlm-100-1280": "https://huggingface.co/xlm-mlm-100-1280/resolve/main/config.json", } diff --git a/src/transformers/configuration_xlm_prophetnet.py b/src/transformers/configuration_xlm_prophetnet.py index 025632c8e4..6ebf8899e0 100644 --- a/src/transformers/configuration_xlm_prophetnet.py +++ b/src/transformers/configuration_xlm_prophetnet.py @@ -22,7 +22,7 @@ from .utils import logging logger = logging.get_logger(__name__) XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "microsoft/xprophetnet-large-wiki100-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/xprophetnet-large-wiki100-cased/config.json", + "microsoft/xprophetnet-large-wiki100-cased": "https://huggingface.co/microsoft/xprophetnet-large-wiki100-cased/resolve/main/config.json", } diff --git a/src/transformers/configuration_xlm_roberta.py b/src/transformers/configuration_xlm_roberta.py index d4dd588232..76e93610c0 100644 --- a/src/transformers/configuration_xlm_roberta.py +++ b/src/transformers/configuration_xlm_roberta.py @@ -22,12 +22,12 @@ from .utils import logging logger = logging.get_logger(__name__) XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "xlm-roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-base-config.json", - "xlm-roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-config.json", - "xlm-roberta-large-finetuned-conll02-dutch": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-dutch-config.json", - "xlm-roberta-large-finetuned-conll02-spanish": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-spanish-config.json", - "xlm-roberta-large-finetuned-conll03-english": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-english-config.json", - "xlm-roberta-large-finetuned-conll03-german": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-german-config.json", + "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base/resolve/main/config.json", + "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large/resolve/main/config.json", + "xlm-roberta-large-finetuned-conll02-dutch": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/config.json", + "xlm-roberta-large-finetuned-conll02-spanish": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/config.json", + "xlm-roberta-large-finetuned-conll03-english": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/config.json", + "xlm-roberta-large-finetuned-conll03-german": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/config.json", } diff --git a/src/transformers/configuration_xlnet.py b/src/transformers/configuration_xlnet.py index 4a39d130c3..150f10e3e3 100644 --- a/src/transformers/configuration_xlnet.py +++ b/src/transformers/configuration_xlnet.py @@ -22,8 +22,8 @@ from .utils import logging logger = logging.get_logger(__name__) XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "xlnet-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-config.json", - "xlnet-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-config.json", + "xlnet-base-cased": "https://huggingface.co/xlnet-base-cased/resolve/main/config.json", + "xlnet-large-cased": "https://huggingface.co/xlnet-large-cased/resolve/main/config.json", } diff --git a/src/transformers/tokenization_albert.py b/src/transformers/tokenization_albert.py index 34d16a7913..10d4df0bb8 100644 --- a/src/transformers/tokenization_albert.py +++ b/src/transformers/tokenization_albert.py @@ -31,14 +31,14 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "albert-base-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v1-spiece.model", - "albert-large-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v1-spiece.model", - "albert-xlarge-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v1-spiece.model", - "albert-xxlarge-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v1-spiece.model", - "albert-base-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v2-spiece.model", - "albert-large-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v2-spiece.model", - "albert-xlarge-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v2-spiece.model", - "albert-xxlarge-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-spiece.model", + "albert-base-v1": "https://huggingface.co/albert-base-v1/resolve/main/spiece.model", + "albert-large-v1": "https://huggingface.co/albert-large-v1/resolve/main/spiece.model", + "albert-xlarge-v1": "https://huggingface.co/albert-xlarge-v1/resolve/main/spiece.model", + "albert-xxlarge-v1": "https://huggingface.co/albert-xxlarge-v1/resolve/main/spiece.model", + "albert-base-v2": "https://huggingface.co/albert-base-v2/resolve/main/spiece.model", + "albert-large-v2": "https://huggingface.co/albert-large-v2/resolve/main/spiece.model", + "albert-xlarge-v2": "https://huggingface.co/albert-xlarge-v2/resolve/main/spiece.model", + "albert-xxlarge-v2": "https://huggingface.co/albert-xxlarge-v2/resolve/main/spiece.model", } } diff --git a/src/transformers/tokenization_albert_fast.py b/src/transformers/tokenization_albert_fast.py index f67cbc9a6d..ac190e77dd 100644 --- a/src/transformers/tokenization_albert_fast.py +++ b/src/transformers/tokenization_albert_fast.py @@ -34,24 +34,24 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer. PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "albert-base-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v1-spiece.model", - "albert-large-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v1-spiece.model", - "albert-xlarge-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v1-spiece.model", - "albert-xxlarge-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v1-spiece.model", - "albert-base-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v2-spiece.model", - "albert-large-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v2-spiece.model", - "albert-xlarge-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v2-spiece.model", - "albert-xxlarge-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-spiece.model", + "albert-base-v1": "https://huggingface.co/albert-base-v1/resolve/main/spiece.model", + "albert-large-v1": "https://huggingface.co/albert-large-v1/resolve/main/spiece.model", + "albert-xlarge-v1": "https://huggingface.co/albert-xlarge-v1/resolve/main/spiece.model", + "albert-xxlarge-v1": "https://huggingface.co/albert-xxlarge-v1/resolve/main/spiece.model", + "albert-base-v2": "https://huggingface.co/albert-base-v2/resolve/main/spiece.model", + "albert-large-v2": "https://huggingface.co/albert-large-v2/resolve/main/spiece.model", + "albert-xlarge-v2": "https://huggingface.co/albert-xlarge-v2/resolve/main/spiece.model", + "albert-xxlarge-v2": "https://huggingface.co/albert-xxlarge-v2/resolve/main/spiece.model", }, "tokenizer_file": { - "albert-base-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v1-tokenizer.json", - "albert-large-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v1-tokenizer.json", - "albert-xlarge-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v1-tokenizer.json", - "albert-xxlarge-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v1-tokenizer.json", - "albert-base-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v2-tokenizer.json", - "albert-large-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v2-tokenizer.json", - "albert-xlarge-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v2-tokenizer.json", - "albert-xxlarge-v2": "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-tokenizer.json", + "albert-base-v1": "https://huggingface.co/albert-base-v1/resolve/main/tokenizer.json", + "albert-large-v1": "https://huggingface.co/albert-large-v1/resolve/main/tokenizer.json", + "albert-xlarge-v1": "https://huggingface.co/albert-xlarge-v1/resolve/main/tokenizer.json", + "albert-xxlarge-v1": "https://huggingface.co/albert-xxlarge-v1/resolve/main/tokenizer.json", + "albert-base-v2": "https://huggingface.co/albert-base-v2/resolve/main/tokenizer.json", + "albert-large-v2": "https://huggingface.co/albert-large-v2/resolve/main/tokenizer.json", + "albert-xlarge-v2": "https://huggingface.co/albert-xlarge-v2/resolve/main/tokenizer.json", + "albert-xxlarge-v2": "https://huggingface.co/albert-xxlarge-v2/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_bart.py b/src/transformers/tokenization_bart.py index 00b2062d37..bc346074bd 100644 --- a/src/transformers/tokenization_bart.py +++ b/src/transformers/tokenization_bart.py @@ -24,8 +24,8 @@ logger = logging.get_logger(__name__) # vocab and merges same as roberta -vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json" -merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt" +vocab_url = "https://huggingface.co/roberta-large/resolve/main/vocab.json" +merges_url = "https://huggingface.co/roberta-large/resolve/main/merges.txt" _all_bart_models = [ "facebook/bart-base", "facebook/bart-large", diff --git a/src/transformers/tokenization_bart_fast.py b/src/transformers/tokenization_bart_fast.py index 06adc7e1c7..e27e83a7b5 100644 --- a/src/transformers/tokenization_bart_fast.py +++ b/src/transformers/tokenization_bart_fast.py @@ -25,9 +25,9 @@ logger = logging.get_logger(__name__) # vocab and merges same as roberta -vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json" -merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt" -tokenizer_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-tokenizer.json" +vocab_url = "https://huggingface.co/roberta-large/resolve/main/vocab.json" +merges_url = "https://huggingface.co/roberta-large/resolve/main/merges.txt" +tokenizer_url = "https://huggingface.co/roberta-large/resolve/main/tokenizer.json" _all_bart_models = [ "facebook/bart-base", "facebook/bart-large", diff --git a/src/transformers/tokenization_bert.py b/src/transformers/tokenization_bert.py index 1e58a54266..fe67e31938 100644 --- a/src/transformers/tokenization_bert.py +++ b/src/transformers/tokenization_bert.py @@ -30,24 +30,24 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", - "bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt", - "bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt", - "bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt", - "bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", - "bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt", + "bert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "bert-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt", + "bert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/vocab.txt", + "bert-large-cased": "https://huggingface.co/bert-large-cased/resolve/main/vocab.txt", + "bert-base-multilingual-uncased": "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/vocab.txt", + "bert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt", + "bert-base-chinese": "https://huggingface.co/bert-base-chinese/resolve/main/vocab.txt", "bert-base-german-cased": "https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-vocab.txt", - "bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-vocab.txt", - "bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-vocab.txt", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-vocab.txt", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-vocab.txt", - "bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-vocab.txt", - "bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-vocab.txt", - "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-vocab.txt", - "TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/vocab.txt", - "TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt", - "wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/vocab.txt", + "bert-large-uncased-whole-word-masking": "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/vocab.txt", + "bert-large-cased-whole-word-masking": "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/vocab.txt", + "bert-large-uncased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt", + "bert-large-cased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt", + "bert-base-cased-finetuned-mrpc": "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/vocab.txt", + "bert-base-german-dbmdz-cased": "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/vocab.txt", + "bert-base-german-dbmdz-uncased": "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/vocab.txt", + "TurkuNLP/bert-base-finnish-cased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/vocab.txt", + "TurkuNLP/bert-base-finnish-uncased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/vocab.txt", + "wietsedv/bert-base-dutch-cased": "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/vocab.txt", } } diff --git a/src/transformers/tokenization_bert_fast.py b/src/transformers/tokenization_bert_fast.py index a5b95d8e34..ddd7f6043f 100644 --- a/src/transformers/tokenization_bert_fast.py +++ b/src/transformers/tokenization_bert_fast.py @@ -30,44 +30,44 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", - "bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt", - "bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt", - "bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt", - "bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", - "bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt", + "bert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "bert-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt", + "bert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/vocab.txt", + "bert-large-cased": "https://huggingface.co/bert-large-cased/resolve/main/vocab.txt", + "bert-base-multilingual-uncased": "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/vocab.txt", + "bert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt", + "bert-base-chinese": "https://huggingface.co/bert-base-chinese/resolve/main/vocab.txt", "bert-base-german-cased": "https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-vocab.txt", - "bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-vocab.txt", - "bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-vocab.txt", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-vocab.txt", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-vocab.txt", - "bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-vocab.txt", - "bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-vocab.txt", - "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-vocab.txt", - "TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/vocab.txt", - "TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt", - "wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/vocab.txt", + "bert-large-uncased-whole-word-masking": "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/vocab.txt", + "bert-large-cased-whole-word-masking": "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/vocab.txt", + "bert-large-uncased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt", + "bert-large-cased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/vocab.txt", + "bert-base-cased-finetuned-mrpc": "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/vocab.txt", + "bert-base-german-dbmdz-cased": "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/vocab.txt", + "bert-base-german-dbmdz-uncased": "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/vocab.txt", + "TurkuNLP/bert-base-finnish-cased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/vocab.txt", + "TurkuNLP/bert-base-finnish-uncased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/vocab.txt", + "wietsedv/bert-base-dutch-cased": "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/vocab.txt", }, "tokenizer_file": { - "bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-tokenizer.json", - "bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-tokenizer.json", - "bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-tokenizer.json", - "bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-tokenizer.json", - "bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-tokenizer.json", - "bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-tokenizer.json", + "bert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "bert-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/tokenizer.json", + "bert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/tokenizer.json", + "bert-large-cased": "https://huggingface.co/bert-large-cased/resolve/main/tokenizer.json", + "bert-base-multilingual-uncased": "https://huggingface.co/bert-base-multilingual-uncased/resolve/main/tokenizer.json", + "bert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json", + "bert-base-chinese": "https://huggingface.co/bert-base-chinese/resolve/main/tokenizer.json", "bert-base-german-cased": "https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-tokenizer.json", - "bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-tokenizer.json", - "bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-tokenizer.json", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-tokenizer.json", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-tokenizer.json", - "bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-tokenizer.json", - "bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-tokenizer.json", - "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-tokenizer.json", - "TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/tokenizer.json", - "TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/tokenizer.json", - "wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/tokenizer.json", + "bert-large-uncased-whole-word-masking": "https://huggingface.co/bert-large-uncased-whole-word-masking/resolve/main/tokenizer.json", + "bert-large-cased-whole-word-masking": "https://huggingface.co/bert-large-cased-whole-word-masking/resolve/main/tokenizer.json", + "bert-large-uncased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad/resolve/main/tokenizer.json", + "bert-large-cased-whole-word-masking-finetuned-squad": "https://huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad/resolve/main/tokenizer.json", + "bert-base-cased-finetuned-mrpc": "https://huggingface.co/bert-base-cased-finetuned-mrpc/resolve/main/tokenizer.json", + "bert-base-german-dbmdz-cased": "https://huggingface.co/bert-base-german-dbmdz-cased/resolve/main/tokenizer.json", + "bert-base-german-dbmdz-uncased": "https://huggingface.co/bert-base-german-dbmdz-uncased/resolve/main/tokenizer.json", + "TurkuNLP/bert-base-finnish-cased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/resolve/main/tokenizer.json", + "TurkuNLP/bert-base-finnish-uncased-v1": "https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/resolve/main/tokenizer.json", + "wietsedv/bert-base-dutch-cased": "https://huggingface.co/wietsedv/bert-base-dutch-cased/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_bert_generation.py b/src/transformers/tokenization_bert_generation.py index fe7b749601..e96dc606bd 100644 --- a/src/transformers/tokenization_bert_generation.py +++ b/src/transformers/tokenization_bert_generation.py @@ -30,7 +30,7 @@ logger = logging.get_logger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} tokenizer_url = ( - "https://s3.amazonaws.com/models.huggingface.co/bert/google/bert_for_seq_generation_L-24_bbc_encoder/spiece.model" + "https://huggingface.co/google/bert_for_seq_generation_L-24_bbc_encoder/resolve/main/spiece.model" ) diff --git a/src/transformers/tokenization_bert_japanese.py b/src/transformers/tokenization_bert_japanese.py index ddf6f451f3..2ef263977c 100644 --- a/src/transformers/tokenization_bert_japanese.py +++ b/src/transformers/tokenization_bert_japanese.py @@ -31,10 +31,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "cl-tohoku/bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/vocab.txt", - "cl-tohoku/bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/vocab.txt", - "cl-tohoku/bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/vocab.txt", - "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/vocab.txt", + "cl-tohoku/bert-base-japanese": "https://huggingface.co/cl-tohoku/bert-base-japanese/resolve/main/vocab.txt", + "cl-tohoku/bert-base-japanese-whole-word-masking": "https://huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/resolve/main/vocab.txt", + "cl-tohoku/bert-base-japanese-char": "https://huggingface.co/cl-tohoku/bert-base-japanese-char/resolve/main/vocab.txt", + "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/resolve/main/vocab.txt", } } diff --git a/src/transformers/tokenization_bertweet.py b/src/transformers/tokenization_bertweet.py index 0c77a9fc2c..402fe7708b 100644 --- a/src/transformers/tokenization_bertweet.py +++ b/src/transformers/tokenization_bertweet.py @@ -37,10 +37,10 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "vinai/bertweet-base": "https://s3.amazonaws.com/models.huggingface.co/bert/vinai/bertweet-base/vocab.txt", + "vinai/bertweet-base": "https://huggingface.co/vinai/bertweet-base/resolve/main/vocab.txt", }, "merges_file": { - "vinai/bertweet-base": "https://s3.amazonaws.com/models.huggingface.co/bert/vinai/bertweet-base/bpe.codes", + "vinai/bertweet-base": "https://huggingface.co/vinai/bertweet-base/resolve/main/bpe.codes", }, } diff --git a/src/transformers/tokenization_camembert.py b/src/transformers/tokenization_camembert.py index 3abc5d78bb..794f3d8c71 100644 --- a/src/transformers/tokenization_camembert.py +++ b/src/transformers/tokenization_camembert.py @@ -31,7 +31,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-sentencepiece.bpe.model", + "camembert-base": "https://huggingface.co/camembert-base/resolve/main/sentencepiece.bpe.model", } } diff --git a/src/transformers/tokenization_camembert_fast.py b/src/transformers/tokenization_camembert_fast.py index 31a5066072..4e4cf9557d 100644 --- a/src/transformers/tokenization_camembert_fast.py +++ b/src/transformers/tokenization_camembert_fast.py @@ -36,10 +36,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model", "tokenizer_file": PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-sentencepiece.bpe.model", + "camembert-base": "https://huggingface.co/camembert-base/resolve/main/sentencepiece.bpe.model", }, "tokenizer_file": { - "camembert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/camembert-base-tokenizer.json", + "camembert-base": "https://huggingface.co/camembert-base/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_deberta.py b/src/transformers/tokenization_deberta.py index 18bad354c5..057dd94a59 100644 --- a/src/transformers/tokenization_deberta.py +++ b/src/transformers/tokenization_deberta.py @@ -42,8 +42,8 @@ VOCAB_FILES_NAMES = {"vocab_file": "bpe_encoder.bin"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/deberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/deberta-base/bpe_encoder.bin", - "microsoft/deberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/deberta-large/bpe_encoder.bin", + "microsoft/deberta-base": "https://huggingface.co/microsoft/deberta-base/resolve/main/bpe_encoder.bin", + "microsoft/deberta-large": "https://huggingface.co/microsoft/deberta-large/resolve/main/bpe_encoder.bin", } } diff --git a/src/transformers/tokenization_distilbert.py b/src/transformers/tokenization_distilbert.py index f7136b3e36..8f4109ed76 100644 --- a/src/transformers/tokenization_distilbert.py +++ b/src/transformers/tokenization_distilbert.py @@ -24,12 +24,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "distilbert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "distilbert-base-uncased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", - "distilbert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt", - "distilbert-base-cased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt", - "distilbert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-german-cased-vocab.txt", - "distilbert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", + "distilbert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "distilbert-base-uncased-distilled-squad": "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt", + "distilbert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/vocab.txt", + "distilbert-base-cased-distilled-squad": "https://huggingface.co/bert-large-cased/resolve/main/vocab.txt", + "distilbert-base-german-cased": "https://huggingface.co/distilbert-base-german-cased/resolve/main/vocab.txt", + "distilbert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt", } } diff --git a/src/transformers/tokenization_distilbert_fast.py b/src/transformers/tokenization_distilbert_fast.py index 5ba84cd10a..7129bc3eec 100644 --- a/src/transformers/tokenization_distilbert_fast.py +++ b/src/transformers/tokenization_distilbert_fast.py @@ -25,20 +25,20 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "distilbert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "distilbert-base-uncased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", - "distilbert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt", - "distilbert-base-cased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt", - "distilbert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-german-cased-vocab.txt", - "distilbert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", + "distilbert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "distilbert-base-uncased-distilled-squad": "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt", + "distilbert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/vocab.txt", + "distilbert-base-cased-distilled-squad": "https://huggingface.co/bert-large-cased/resolve/main/vocab.txt", + "distilbert-base-german-cased": "https://huggingface.co/distilbert-base-german-cased/resolve/main/vocab.txt", + "distilbert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt", }, "tokenizer_file": { - "distilbert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "distilbert-base-uncased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-tokenizer.json", - "distilbert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-tokenizer.json", - "distilbert-base-cased-distilled-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-tokenizer.json", - "distilbert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-german-cased-tokenizer.json", - "distilbert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-tokenizer.json", + "distilbert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "distilbert-base-uncased-distilled-squad": "https://huggingface.co/bert-large-uncased/resolve/main/tokenizer.json", + "distilbert-base-cased": "https://huggingface.co/bert-base-cased/resolve/main/tokenizer.json", + "distilbert-base-cased-distilled-squad": "https://huggingface.co/bert-large-cased/resolve/main/tokenizer.json", + "distilbert-base-german-cased": "https://huggingface.co/distilbert-base-german-cased/resolve/main/tokenizer.json", + "distilbert-base-multilingual-cased": "https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_dpr.py b/src/transformers/tokenization_dpr.py index 4aca836e63..b48349c168 100644 --- a/src/transformers/tokenization_dpr.py +++ b/src/transformers/tokenization_dpr.py @@ -30,32 +30,32 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-ctx_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "facebook/dpr-ctx_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "facebook/dpr-ctx_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "facebook/dpr-ctx_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", + "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", }, } QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-question_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "facebook/dpr-question_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "facebook/dpr-question_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "facebook/dpr-question_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", + "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", }, } READER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-reader-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "facebook/dpr-reader-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "facebook/dpr-reader-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "facebook/dpr-reader-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "facebook/dpr-reader-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "facebook/dpr-reader-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", + "facebook/dpr-reader-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "facebook/dpr-reader-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_dpr_fast.py b/src/transformers/tokenization_dpr_fast.py index 29e98119ab..7d5f052051 100644 --- a/src/transformers/tokenization_dpr_fast.py +++ b/src/transformers/tokenization_dpr_fast.py @@ -31,32 +31,32 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-ctx_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "facebook/dpr-ctx_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "facebook/dpr-ctx_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "facebook/dpr-ctx_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", + "facebook/dpr-ctx_encoder-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "facebook/dpr-ctx_encoder-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", }, } QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-question_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "facebook/dpr-question_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "facebook/dpr-question_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "facebook/dpr-question_encoder-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", + "facebook/dpr-question_encoder-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "facebook/dpr-question_encoder-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", }, } READER_PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "facebook/dpr-reader-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "facebook/dpr-reader-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "facebook/dpr-reader-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "facebook/dpr-reader-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "facebook/dpr-reader-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "facebook/dpr-reader-multiset-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", + "facebook/dpr-reader-single-nq-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "facebook/dpr-reader-multiset-base": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_electra.py b/src/transformers/tokenization_electra.py index 50a3c1959c..9b0e394bf1 100644 --- a/src/transformers/tokenization_electra.py +++ b/src/transformers/tokenization_electra.py @@ -20,12 +20,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "google/electra-small-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-generator/vocab.txt", - "google/electra-base-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-generator/vocab.txt", - "google/electra-large-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-generator/vocab.txt", - "google/electra-small-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-discriminator/vocab.txt", - "google/electra-base-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-discriminator/vocab.txt", - "google/electra-large-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-discriminator/vocab.txt", + "google/electra-small-generator": "https://huggingface.co/google/electra-small-generator/resolve/main/vocab.txt", + "google/electra-base-generator": "https://huggingface.co/google/electra-base-generator/resolve/main/vocab.txt", + "google/electra-large-generator": "https://huggingface.co/google/electra-large-generator/resolve/main/vocab.txt", + "google/electra-small-discriminator": "https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt", + "google/electra-base-discriminator": "https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt", + "google/electra-large-discriminator": "https://huggingface.co/google/electra-large-discriminator/resolve/main/vocab.txt", } } diff --git a/src/transformers/tokenization_electra_fast.py b/src/transformers/tokenization_electra_fast.py index 3ada305766..470b936d72 100644 --- a/src/transformers/tokenization_electra_fast.py +++ b/src/transformers/tokenization_electra_fast.py @@ -21,20 +21,20 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "google/electra-small-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-generator/vocab.txt", - "google/electra-base-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-generator/vocab.txt", - "google/electra-large-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-generator/vocab.txt", - "google/electra-small-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-discriminator/vocab.txt", - "google/electra-base-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-discriminator/vocab.txt", - "google/electra-large-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-discriminator/vocab.txt", + "google/electra-small-generator": "https://huggingface.co/google/electra-small-generator/resolve/main/vocab.txt", + "google/electra-base-generator": "https://huggingface.co/google/electra-base-generator/resolve/main/vocab.txt", + "google/electra-large-generator": "https://huggingface.co/google/electra-large-generator/resolve/main/vocab.txt", + "google/electra-small-discriminator": "https://huggingface.co/google/electra-small-discriminator/resolve/main/vocab.txt", + "google/electra-base-discriminator": "https://huggingface.co/google/electra-base-discriminator/resolve/main/vocab.txt", + "google/electra-large-discriminator": "https://huggingface.co/google/electra-large-discriminator/resolve/main/vocab.txt", }, "tokenizer_file": { - "google/electra-small-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-generator/tokenizer.json", - "google/electra-base-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-generator/tokenizer.json", - "google/electra-large-generator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-generator/tokenizer.json", - "google/electra-small-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-small-discriminator/tokenizer.json", - "google/electra-base-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-base-discriminator/tokenizer.json", - "google/electra-large-discriminator": "https://s3.amazonaws.com/models.huggingface.co/bert/google/electra-large-discriminator/tokenizer.json", + "google/electra-small-generator": "https://huggingface.co/google/electra-small-generator/resolve/main/tokenizer.json", + "google/electra-base-generator": "https://huggingface.co/google/electra-base-generator/resolve/main/tokenizer.json", + "google/electra-large-generator": "https://huggingface.co/google/electra-large-generator/resolve/main/tokenizer.json", + "google/electra-small-discriminator": "https://huggingface.co/google/electra-small-discriminator/resolve/main/tokenizer.json", + "google/electra-base-discriminator": "https://huggingface.co/google/electra-base-discriminator/resolve/main/tokenizer.json", + "google/electra-large-discriminator": "https://huggingface.co/google/electra-large-discriminator/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_flaubert.py b/src/transformers/tokenization_flaubert.py index b5e36e01b6..b81793ecfc 100644 --- a/src/transformers/tokenization_flaubert.py +++ b/src/transformers/tokenization_flaubert.py @@ -32,16 +32,16 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "flaubert/flaubert_small_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/vocab.json", - "flaubert/flaubert_base_uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/vocab.json", - "flaubert/flaubert_base_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/vocab.json", - "flaubert/flaubert_large_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/vocab.json", + "flaubert/flaubert_small_cased": "https://huggingface.co/flaubert/flaubert_small_cased/resolve/main/vocab.json", + "flaubert/flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased/resolve/main/vocab.json", + "flaubert/flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased/resolve/main/vocab.json", + "flaubert/flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased/resolve/main/vocab.json", }, "merges_file": { - "flaubert/flaubert_small_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/merges.txt", - "flaubert/flaubert_base_uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/merges.txt", - "flaubert/flaubert_base_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/merges.txt", - "flaubert/flaubert_large_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/merges.txt", + "flaubert/flaubert_small_cased": "https://huggingface.co/flaubert/flaubert_small_cased/resolve/main/merges.txt", + "flaubert/flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased/resolve/main/merges.txt", + "flaubert/flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased/resolve/main/merges.txt", + "flaubert/flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased/resolve/main/merges.txt", }, } diff --git a/src/transformers/tokenization_funnel.py b/src/transformers/tokenization_funnel.py index cb8694066d..68df5744e9 100644 --- a/src/transformers/tokenization_funnel.py +++ b/src/transformers/tokenization_funnel.py @@ -39,16 +39,16 @@ _model_names = [ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "funnel-transformer/small": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small/vocab.txt", - "funnel-transformer/small-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small-base/vocab.txt", - "funnel-transformer/medium": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium/vocab.txt", - "funnel-transformer/medium-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium-base/vocab.txt", - "funnel-transformer/intermediate": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate/vocab.txt", - "funnel-transformer/intermediate-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate-base/vocab.txt", - "funnel-transformer/large": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large/vocab.txt", - "funnel-transformer/large-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large-base/vocab.txt", - "funnel-transformer/xlarge": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge/vocab.txt", - "funnel-transformer/xlarge-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge-base/vocab.txt", + "funnel-transformer/small": "https://huggingface.co/funnel-transformer/small/resolve/main/vocab.txt", + "funnel-transformer/small-base": "https://huggingface.co/funnel-transformer/small-base/resolve/main/vocab.txt", + "funnel-transformer/medium": "https://huggingface.co/funnel-transformer/medium/resolve/main/vocab.txt", + "funnel-transformer/medium-base": "https://huggingface.co/funnel-transformer/medium-base/resolve/main/vocab.txt", + "funnel-transformer/intermediate": "https://huggingface.co/funnel-transformer/intermediate/resolve/main/vocab.txt", + "funnel-transformer/intermediate-base": "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/vocab.txt", + "funnel-transformer/large": "https://huggingface.co/funnel-transformer/large/resolve/main/vocab.txt", + "funnel-transformer/large-base": "https://huggingface.co/funnel-transformer/large-base/resolve/main/vocab.txt", + "funnel-transformer/xlarge": "https://huggingface.co/funnel-transformer/xlarge/resolve/main/vocab.txt", + "funnel-transformer/xlarge-base": "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/vocab.txt", } } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {f"funnel-transformer/{name}": 512 for name in _model_names} diff --git a/src/transformers/tokenization_funnel_fast.py b/src/transformers/tokenization_funnel_fast.py index 50ef3b4990..bc24846c66 100644 --- a/src/transformers/tokenization_funnel_fast.py +++ b/src/transformers/tokenization_funnel_fast.py @@ -40,28 +40,28 @@ _model_names = [ PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "funnel-transformer/small": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small/vocab.txt", - "funnel-transformer/small-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small-base/vocab.txt", - "funnel-transformer/medium": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium/vocab.txt", - "funnel-transformer/medium-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium-base/vocab.txt", - "funnel-transformer/intermediate": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate/vocab.txt", - "funnel-transformer/intermediate-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate-base/vocab.txt", - "funnel-transformer/large": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large/vocab.txt", - "funnel-transformer/large-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large-base/vocab.txt", - "funnel-transformer/xlarge": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge/vocab.txt", - "funnel-transformer/xlarge-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge-base/vocab.txt", + "funnel-transformer/small": "https://huggingface.co/funnel-transformer/small/resolve/main/vocab.txt", + "funnel-transformer/small-base": "https://huggingface.co/funnel-transformer/small-base/resolve/main/vocab.txt", + "funnel-transformer/medium": "https://huggingface.co/funnel-transformer/medium/resolve/main/vocab.txt", + "funnel-transformer/medium-base": "https://huggingface.co/funnel-transformer/medium-base/resolve/main/vocab.txt", + "funnel-transformer/intermediate": "https://huggingface.co/funnel-transformer/intermediate/resolve/main/vocab.txt", + "funnel-transformer/intermediate-base": "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/vocab.txt", + "funnel-transformer/large": "https://huggingface.co/funnel-transformer/large/resolve/main/vocab.txt", + "funnel-transformer/large-base": "https://huggingface.co/funnel-transformer/large-base/resolve/main/vocab.txt", + "funnel-transformer/xlarge": "https://huggingface.co/funnel-transformer/xlarge/resolve/main/vocab.txt", + "funnel-transformer/xlarge-base": "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/vocab.txt", }, "tokenizer_file": { - "funnel-transformer/small": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small/tokenizer.json", - "funnel-transformer/small-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small-base/tokenizer.json", - "funnel-transformer/medium": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium/tokenizer.json", - "funnel-transformer/medium-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium-base/tokenizer.json", - "funnel-transformer/intermediate": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate/tokenizer.json", - "funnel-transformer/intermediate-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate-base/tokenizer.json", - "funnel-transformer/large": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large/tokenizer.json", - "funnel-transformer/large-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large-base/tokenizer.json", - "funnel-transformer/xlarge": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge/tokenizer.json", - "funnel-transformer/xlarge-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge-base/tokenizer.json", + "funnel-transformer/small": "https://huggingface.co/funnel-transformer/small/resolve/main/tokenizer.json", + "funnel-transformer/small-base": "https://huggingface.co/funnel-transformer/small-base/resolve/main/tokenizer.json", + "funnel-transformer/medium": "https://huggingface.co/funnel-transformer/medium/resolve/main/tokenizer.json", + "funnel-transformer/medium-base": "https://huggingface.co/funnel-transformer/medium-base/resolve/main/tokenizer.json", + "funnel-transformer/intermediate": "https://huggingface.co/funnel-transformer/intermediate/resolve/main/tokenizer.json", + "funnel-transformer/intermediate-base": "https://huggingface.co/funnel-transformer/intermediate-base/resolve/main/tokenizer.json", + "funnel-transformer/large": "https://huggingface.co/funnel-transformer/large/resolve/main/tokenizer.json", + "funnel-transformer/large-base": "https://huggingface.co/funnel-transformer/large-base/resolve/main/tokenizer.json", + "funnel-transformer/xlarge": "https://huggingface.co/funnel-transformer/xlarge/resolve/main/tokenizer.json", + "funnel-transformer/xlarge-base": "https://huggingface.co/funnel-transformer/xlarge-base/resolve/main/tokenizer.json", }, } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {f"funnel-transformer/{name}": 512 for name in _model_names} diff --git a/src/transformers/tokenization_gpt2.py b/src/transformers/tokenization_gpt2.py index aceaa85cc7..f4a7e54912 100644 --- a/src/transformers/tokenization_gpt2.py +++ b/src/transformers/tokenization_gpt2.py @@ -36,18 +36,18 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json", + "gpt2": "https://huggingface.co/gpt2/resolve/main/vocab.json", + "gpt2-medium": "https://huggingface.co/gpt2-medium/resolve/main/vocab.json", + "gpt2-large": "https://huggingface.co/gpt2-large/resolve/main/vocab.json", + "gpt2-xl": "https://huggingface.co/gpt2-xl/resolve/main/vocab.json", + "distilgpt2": "https://huggingface.co/distilgpt2/resolve/main/vocab.json", }, "merges_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt", + "gpt2": "https://huggingface.co/gpt2/resolve/main/merges.txt", + "gpt2-medium": "https://huggingface.co/gpt2-medium/resolve/main/merges.txt", + "gpt2-large": "https://huggingface.co/gpt2-large/resolve/main/merges.txt", + "gpt2-xl": "https://huggingface.co/gpt2-xl/resolve/main/merges.txt", + "distilgpt2": "https://huggingface.co/distilgpt2/resolve/main/merges.txt", }, } diff --git a/src/transformers/tokenization_gpt2_fast.py b/src/transformers/tokenization_gpt2_fast.py index cb4a6dfe30..54c4942211 100644 --- a/src/transformers/tokenization_gpt2_fast.py +++ b/src/transformers/tokenization_gpt2_fast.py @@ -33,25 +33,25 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "t PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json", + "gpt2": "https://huggingface.co/gpt2/resolve/main/vocab.json", + "gpt2-medium": "https://huggingface.co/gpt2-medium/resolve/main/vocab.json", + "gpt2-large": "https://huggingface.co/gpt2-large/resolve/main/vocab.json", + "gpt2-xl": "https://huggingface.co/gpt2-xl/resolve/main/vocab.json", + "distilgpt2": "https://huggingface.co/distilgpt2/resolve/main/vocab.json", }, "merges_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt", + "gpt2": "https://huggingface.co/gpt2/resolve/main/merges.txt", + "gpt2-medium": "https://huggingface.co/gpt2-medium/resolve/main/merges.txt", + "gpt2-large": "https://huggingface.co/gpt2-large/resolve/main/merges.txt", + "gpt2-xl": "https://huggingface.co/gpt2-xl/resolve/main/merges.txt", + "distilgpt2": "https://huggingface.co/distilgpt2/resolve/main/merges.txt", }, "tokenizer_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-tokenizer.json", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-tokenizer.json", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-tokenizer.json", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-tokenizer.json", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-tokenizer.json", + "gpt2": "https://huggingface.co/gpt2/resolve/main/tokenizer.json", + "gpt2-medium": "https://huggingface.co/gpt2-medium/resolve/main/tokenizer.json", + "gpt2-large": "https://huggingface.co/gpt2-large/resolve/main/tokenizer.json", + "gpt2-xl": "https://huggingface.co/gpt2-xl/resolve/main/tokenizer.json", + "distilgpt2": "https://huggingface.co/distilgpt2/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_layoutlm.py b/src/transformers/tokenization_layoutlm.py index 0a3c3e7bff..61ae88e5dc 100644 --- a/src/transformers/tokenization_layoutlm.py +++ b/src/transformers/tokenization_layoutlm.py @@ -25,8 +25,8 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/layoutlm-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "microsoft/layoutlm-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", + "microsoft/layoutlm-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "microsoft/layoutlm-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt", } } diff --git a/src/transformers/tokenization_layoutlm_fast.py b/src/transformers/tokenization_layoutlm_fast.py index 1e71a31e4b..4d9598cae2 100644 --- a/src/transformers/tokenization_layoutlm_fast.py +++ b/src/transformers/tokenization_layoutlm_fast.py @@ -26,12 +26,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/layoutlm-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", - "microsoft/layoutlm-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", + "microsoft/layoutlm-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", + "microsoft/layoutlm-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "microsoft/layoutlm-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", - "microsoft/layoutlm-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-tokenizer.json", + "microsoft/layoutlm-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", + "microsoft/layoutlm-large-uncased": "https://huggingface.co/bert-large-uncased/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_longformer.py b/src/transformers/tokenization_longformer.py index 3679ed81a9..365dc856d9 100644 --- a/src/transformers/tokenization_longformer.py +++ b/src/transformers/tokenization_longformer.py @@ -21,8 +21,8 @@ logger = logging.get_logger(__name__) # vocab and merges same as roberta -vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json" -merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt" +vocab_url = "https://huggingface.co/roberta-large/resolve/main/vocab.json" +merges_url = "https://huggingface.co/roberta-large/resolve/main/merges.txt" _all_longformer_models = [ "allenai/longformer-base-4096", "allenai/longformer-large-4096", diff --git a/src/transformers/tokenization_longformer_fast.py b/src/transformers/tokenization_longformer_fast.py index 8e4dff49ed..8cab26e59f 100644 --- a/src/transformers/tokenization_longformer_fast.py +++ b/src/transformers/tokenization_longformer_fast.py @@ -22,9 +22,9 @@ logger = logging.get_logger(__name__) # vocab and merges same as roberta -vocab_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json" -merges_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt" -tokenizer_url = "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-tokenizer.json" +vocab_url = "https://huggingface.co/roberta-large/resolve/main/vocab.json" +merges_url = "https://huggingface.co/roberta-large/resolve/main/merges.txt" +tokenizer_url = "https://huggingface.co/roberta-large/resolve/main/tokenizer.json" _all_longformer_models = [ "allenai/longformer-base-4096", "allenai/longformer-large-4096", diff --git a/src/transformers/tokenization_lxmert.py b/src/transformers/tokenization_lxmert.py index b8bf3478b6..272b2fa348 100644 --- a/src/transformers/tokenization_lxmert.py +++ b/src/transformers/tokenization_lxmert.py @@ -28,7 +28,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} #################################################### PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "unc-nlp/lxmert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "unc-nlp/lxmert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", } } diff --git a/src/transformers/tokenization_lxmert_fast.py b/src/transformers/tokenization_lxmert_fast.py index e9048414f8..740a22f6b9 100644 --- a/src/transformers/tokenization_lxmert_fast.py +++ b/src/transformers/tokenization_lxmert_fast.py @@ -29,10 +29,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso #################################################### PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "unc-nlp/lxmert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "unc-nlp/lxmert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "unc-nlp/lxmert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", + "unc-nlp/lxmert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_marian.py b/src/transformers/tokenization_marian.py index 1819b50f0a..cd3ba59336 100644 --- a/src/transformers/tokenization_marian.py +++ b/src/transformers/tokenization_marian.py @@ -31,7 +31,7 @@ PRETRAINED_VOCAB_FILES_MAP = { PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {"Helsinki-NLP/opus-mt-en-de": 512} PRETRAINED_INIT_CONFIGURATION = {} -# Example URL https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/vocab.json +# Example URL https://huggingface.co/Helsinki-NLP/opus-mt-en-de/resolve/main/vocab.json class MarianTokenizer(PreTrainedTokenizer): diff --git a/src/transformers/tokenization_mbart.py b/src/transformers/tokenization_mbart.py index 4392395e3e..44cf760be2 100644 --- a/src/transformers/tokenization_mbart.py +++ b/src/transformers/tokenization_mbart.py @@ -25,7 +25,7 @@ from .utils import logging logger = logging.get_logger(__name__) _all_mbart_models = ["facebook/mbart-large-en-ro", "facebook/mbart-large-cc25"] -SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model" +SPM_URL = "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/sentence.bpe.model" FAIRSEQ_LANGUAGE_CODES = [ "ar_AR", diff --git a/src/transformers/tokenization_mbart_fast.py b/src/transformers/tokenization_mbart_fast.py index 0eadfb8546..3cd7b0ae39 100644 --- a/src/transformers/tokenization_mbart_fast.py +++ b/src/transformers/tokenization_mbart_fast.py @@ -33,8 +33,8 @@ else: logger = logging.get_logger(__name__) _all_mbart_models = ["facebook/mbart-large-en-ro", "facebook/mbart-large-cc25"] -SPM_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/sentence.bpe.model" -tokenizer_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/tokenizer.json" +SPM_URL = "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/sentence.bpe.model" +tokenizer_URL = "https://huggingface.co/facebook/mbart-large-en-ro/resolve/main/tokenizer.json" FAIRSEQ_LANGUAGE_CODES = [ "ar_AR", diff --git a/src/transformers/tokenization_mobilebert.py b/src/transformers/tokenization_mobilebert.py index 90cc6f45b0..f27c1eca61 100644 --- a/src/transformers/tokenization_mobilebert.py +++ b/src/transformers/tokenization_mobilebert.py @@ -23,7 +23,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "mobilebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/google/mobilebert-uncased/vocab.txt" + "mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/vocab.txt" } } diff --git a/src/transformers/tokenization_mobilebert_fast.py b/src/transformers/tokenization_mobilebert_fast.py index 63dca58368..3e03084ab4 100644 --- a/src/transformers/tokenization_mobilebert_fast.py +++ b/src/transformers/tokenization_mobilebert_fast.py @@ -24,10 +24,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "mobilebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/google/mobilebert-uncased/vocab.txt" + "mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/vocab.txt" }, "tokenizer_file": { - "mobilebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/google/mobilebert-uncased/tokenizer.json" + "mobilebert-uncased": "https://huggingface.co/google/mobilebert-uncased/resolve/main/tokenizer.json" }, } diff --git a/src/transformers/tokenization_openai.py b/src/transformers/tokenization_openai.py index b47fbf29d9..6bf1307855 100644 --- a/src/transformers/tokenization_openai.py +++ b/src/transformers/tokenization_openai.py @@ -33,8 +33,8 @@ VOCAB_FILES_NAMES = { } PRETRAINED_VOCAB_FILES_MAP = { - "vocab_file": {"openai-gpt": "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-vocab.json"}, - "merges_file": {"openai-gpt": "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-merges.txt"}, + "vocab_file": {"openai-gpt": "https://huggingface.co/openai-gpt/resolve/main/vocab.json"}, + "merges_file": {"openai-gpt": "https://huggingface.co/openai-gpt/resolve/main/merges.txt"}, } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { diff --git a/src/transformers/tokenization_openai_fast.py b/src/transformers/tokenization_openai_fast.py index 8b18b3d1d3..7286b31217 100644 --- a/src/transformers/tokenization_openai_fast.py +++ b/src/transformers/tokenization_openai_fast.py @@ -27,9 +27,9 @@ logger = logging.get_logger(__name__) VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"} PRETRAINED_VOCAB_FILES_MAP = { - "vocab_file": {"openai-gpt": "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-vocab.json"}, - "merges_file": {"openai-gpt": "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-merges.txt"}, - "tokenizer_file": {"openai-gpt": "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-tokenizer.json"}, + "vocab_file": {"openai-gpt": "https://huggingface.co/openai-gpt/resolve/main/vocab.json"}, + "merges_file": {"openai-gpt": "https://huggingface.co/openai-gpt/resolve/main/merges.txt"}, + "tokenizer_file": {"openai-gpt": "https://huggingface.co/openai-gpt/resolve/main/tokenizer.json"}, } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { diff --git a/src/transformers/tokenization_phobert.py b/src/transformers/tokenization_phobert.py index 7e36eee660..a9a4d3dd7d 100644 --- a/src/transformers/tokenization_phobert.py +++ b/src/transformers/tokenization_phobert.py @@ -34,12 +34,12 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "vinai/phobert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/vinai/phobert-base/vocab.txt", - "vinai/phobert-large": "https://s3.amazonaws.com/models.huggingface.co/bert/vinai/phobert-large/vocab.txt", + "vinai/phobert-base": "https://huggingface.co/vinai/phobert-base/resolve/main/vocab.txt", + "vinai/phobert-large": "https://huggingface.co/vinai/phobert-large/resolve/main/vocab.txt", }, "merges_file": { - "vinai/phobert-base": "https://s3.amazonaws.com/models.huggingface.co/bert/vinai/phobert-base/bpe.codes", - "vinai/phobert-large": "https://s3.amazonaws.com/models.huggingface.co/bert/vinai/phobert-large/bpe.codes", + "vinai/phobert-base": "https://huggingface.co/vinai/phobert-base/resolve/main/bpe.codes", + "vinai/phobert-large": "https://huggingface.co/vinai/phobert-large/resolve/main/bpe.codes", }, } diff --git a/src/transformers/tokenization_prophetnet.py b/src/transformers/tokenization_prophetnet.py index 6936c032f9..541ad47e28 100644 --- a/src/transformers/tokenization_prophetnet.py +++ b/src/transformers/tokenization_prophetnet.py @@ -28,7 +28,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "prophetnet.tokenizer"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "microsoft/prophetnet-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/prophetnet-large-uncased/prophetnet.tokenizer", + "microsoft/prophetnet-large-uncased": "https://huggingface.co/microsoft/prophetnet-large-uncased/resolve/main/prophetnet.tokenizer", } } diff --git a/src/transformers/tokenization_retribert.py b/src/transformers/tokenization_retribert.py index 9da6e2ad61..8627d639bd 100644 --- a/src/transformers/tokenization_retribert.py +++ b/src/transformers/tokenization_retribert.py @@ -24,7 +24,7 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "yjernite/retribert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "yjernite/retribert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", } } diff --git a/src/transformers/tokenization_retribert_fast.py b/src/transformers/tokenization_retribert_fast.py index 1d9cf5feb6..c137caa893 100644 --- a/src/transformers/tokenization_retribert_fast.py +++ b/src/transformers/tokenization_retribert_fast.py @@ -25,10 +25,10 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "yjernite/retribert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", + "yjernite/retribert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt", }, "tokenizer_file": { - "yjernite/retribert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tokenizer.json", + "yjernite/retribert-base-uncased": "https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_roberta.py b/src/transformers/tokenization_roberta.py index f37c14ae5b..9d7731baef 100644 --- a/src/transformers/tokenization_roberta.py +++ b/src/transformers/tokenization_roberta.py @@ -31,20 +31,20 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json", - "roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json", - "roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-vocab.json", - "distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-vocab.json", - "roberta-base-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json", - "roberta-large-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json", + "roberta-base": "https://huggingface.co/roberta-base/resolve/main/vocab.json", + "roberta-large": "https://huggingface.co/roberta-large/resolve/main/vocab.json", + "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/vocab.json", + "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/vocab.json", + "roberta-base-openai-detector": "https://huggingface.co/roberta-base/resolve/main/vocab.json", + "roberta-large-openai-detector": "https://huggingface.co/roberta-large/resolve/main/vocab.json", }, "merges_file": { - "roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt", - "roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt", - "roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-merges.txt", - "distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-merges.txt", - "roberta-base-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt", - "roberta-large-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt", + "roberta-base": "https://huggingface.co/roberta-base/resolve/main/merges.txt", + "roberta-large": "https://huggingface.co/roberta-large/resolve/main/merges.txt", + "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/merges.txt", + "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/merges.txt", + "roberta-base-openai-detector": "https://huggingface.co/roberta-base/resolve/main/merges.txt", + "roberta-large-openai-detector": "https://huggingface.co/roberta-large/resolve/main/merges.txt", }, } diff --git a/src/transformers/tokenization_roberta_fast.py b/src/transformers/tokenization_roberta_fast.py index 3709aec944..9ddaa514c3 100644 --- a/src/transformers/tokenization_roberta_fast.py +++ b/src/transformers/tokenization_roberta_fast.py @@ -27,28 +27,28 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "t PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json", - "roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json", - "roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-vocab.json", - "distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-vocab.json", - "roberta-base-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json", - "roberta-large-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json", + "roberta-base": "https://huggingface.co/roberta-base/resolve/main/vocab.json", + "roberta-large": "https://huggingface.co/roberta-large/resolve/main/vocab.json", + "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/vocab.json", + "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/vocab.json", + "roberta-base-openai-detector": "https://huggingface.co/roberta-base/resolve/main/vocab.json", + "roberta-large-openai-detector": "https://huggingface.co/roberta-large/resolve/main/vocab.json", }, "merges_file": { - "roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt", - "roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt", - "roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-merges.txt", - "distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-merges.txt", - "roberta-base-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt", - "roberta-large-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt", + "roberta-base": "https://huggingface.co/roberta-base/resolve/main/merges.txt", + "roberta-large": "https://huggingface.co/roberta-large/resolve/main/merges.txt", + "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/merges.txt", + "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/merges.txt", + "roberta-base-openai-detector": "https://huggingface.co/roberta-base/resolve/main/merges.txt", + "roberta-large-openai-detector": "https://huggingface.co/roberta-large/resolve/main/merges.txt", }, "tokenizer_file": { - "roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-tokenizer.json", - "roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-tokenizer.json", - "roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-tokenizer.json", - "distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-tokenizer.json", - "roberta-base-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-tokenizer.json", - "roberta-large-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-tokenizer.json", + "roberta-base": "https://huggingface.co/roberta-base/resolve/main/tokenizer.json", + "roberta-large": "https://huggingface.co/roberta-large/resolve/main/tokenizer.json", + "roberta-large-mnli": "https://huggingface.co/roberta-large-mnli/resolve/main/tokenizer.json", + "distilroberta-base": "https://huggingface.co/distilroberta-base/resolve/main/tokenizer.json", + "roberta-base-openai-detector": "https://huggingface.co/roberta-base/resolve/main/tokenizer.json", + "roberta-large-openai-detector": "https://huggingface.co/roberta-large/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_squeezebert.py b/src/transformers/tokenization_squeezebert.py index 1e5ead4443..8629597b8e 100644 --- a/src/transformers/tokenization_squeezebert.py +++ b/src/transformers/tokenization_squeezebert.py @@ -24,9 +24,9 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "squeezebert/squeezebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-uncased/vocab.txt", - "squeezebert/squeezebert-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-mnli/vocab.txt", - "squeezebert/squeezebert-mnli-headless": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-mnli-headless/vocab.txt", + "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/vocab.txt", + "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/vocab.txt", + "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/vocab.txt", } } diff --git a/src/transformers/tokenization_squeezebert_fast.py b/src/transformers/tokenization_squeezebert_fast.py index 2576e19a8f..faa84b99a7 100644 --- a/src/transformers/tokenization_squeezebert_fast.py +++ b/src/transformers/tokenization_squeezebert_fast.py @@ -25,14 +25,14 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.jso PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "squeezebert/squeezebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-uncased/vocab.txt", - "squeezebert/squeezebert-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-mnli/vocab.txt", - "squeezebert/squeezebert-mnli-headless": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-mnli-headless/vocab.txt", + "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/vocab.txt", + "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/vocab.txt", + "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/vocab.txt", }, "tokenizer_file": { - "squeezebert/squeezebert-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-uncased/tokenizer.json", - "squeezebert/squeezebert-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-mnli/tokenizer.json", - "squeezebert/squeezebert-mnli-headless": "https://s3.amazonaws.com/models.huggingface.co/bert/squeezebert/squeezebert-mnli-headless/tokenizer.json", + "squeezebert/squeezebert-uncased": "https://huggingface.co/squeezebert/squeezebert-uncased/resolve/main/tokenizer.json", + "squeezebert/squeezebert-mnli": "https://huggingface.co/squeezebert/squeezebert-mnli/resolve/main/tokenizer.json", + "squeezebert/squeezebert-mnli-headless": "https://huggingface.co/squeezebert/squeezebert-mnli-headless/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_t5.py b/src/transformers/tokenization_t5.py index bc69fc5d56..d23e79e5db 100644 --- a/src/transformers/tokenization_t5.py +++ b/src/transformers/tokenization_t5.py @@ -43,11 +43,11 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} #################################################### PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "t5-small": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", - "t5-base": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", - "t5-large": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", - "t5-3b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", - "t5-11b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", + "t5-small": "https://huggingface.co/t5-small/resolve/main/spiece.model", + "t5-base": "https://huggingface.co/t5-base/resolve/main/spiece.model", + "t5-large": "https://huggingface.co/t5-large/resolve/main/spiece.model", + "t5-3b": "https://huggingface.co/t5-3b/resolve/main/spiece.model", + "t5-11b": "https://huggingface.co/t5-11b/resolve/main/spiece.model", } } diff --git a/src/transformers/tokenization_t5_fast.py b/src/transformers/tokenization_t5_fast.py index 0f96d51fb7..e64d8ca724 100644 --- a/src/transformers/tokenization_t5_fast.py +++ b/src/transformers/tokenization_t5_fast.py @@ -46,18 +46,18 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer. #################################################### PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "t5-small": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", - "t5-base": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", - "t5-large": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", - "t5-3b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", - "t5-11b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model", + "t5-small": "https://huggingface.co/t5-small/resolve/main/spiece.model", + "t5-base": "https://huggingface.co/t5-base/resolve/main/spiece.model", + "t5-large": "https://huggingface.co/t5-large/resolve/main/spiece.model", + "t5-3b": "https://huggingface.co/t5-3b/resolve/main/spiece.model", + "t5-11b": "https://huggingface.co/t5-11b/resolve/main/spiece.model", }, "tokenizer_file": { - "t5-small": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-tokenizer.json", - "t5-base": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-tokenizer.json", - "t5-large": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-tokenizer.json", - "t5-3b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-tokenizer.json", - "t5-11b": "https://s3.amazonaws.com/models.huggingface.co/bert/t5-tokenizer.json", + "t5-small": "https://huggingface.co/t5-small/resolve/main/tokenizer.json", + "t5-base": "https://huggingface.co/t5-base/resolve/main/tokenizer.json", + "t5-large": "https://huggingface.co/t5-large/resolve/main/tokenizer.json", + "t5-3b": "https://huggingface.co/t5-3b/resolve/main/tokenizer.json", + "t5-11b": "https://huggingface.co/t5-11b/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_transfo_xl.py b/src/transformers/tokenization_transfo_xl.py index d6b1f3a4f4..9d728a0756 100644 --- a/src/transformers/tokenization_transfo_xl.py +++ b/src/transformers/tokenization_transfo_xl.py @@ -48,7 +48,7 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "pretrained_vocab_file": { - "transfo-xl-wt103": "https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-vocab.pkl", + "transfo-xl-wt103": "https://huggingface.co/transfo-xl-wt103/resolve/main/vocab.pkl", } } @@ -57,7 +57,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { } PRETRAINED_CORPUS_ARCHIVE_MAP = { - "transfo-xl-wt103": "https://s3.amazonaws.com/models.huggingface.co/bert/transfo-xl-wt103-corpus.bin", + "transfo-xl-wt103": "https://huggingface.co/transfo-xl-wt103/resolve/main/corpus.bin", } CORPUS_NAME = "corpus.bin" diff --git a/src/transformers/tokenization_xlm.py b/src/transformers/tokenization_xlm.py index df6eb74113..577cdc6efa 100644 --- a/src/transformers/tokenization_xlm.py +++ b/src/transformers/tokenization_xlm.py @@ -37,28 +37,28 @@ VOCAB_FILES_NAMES = { PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "xlm-mlm-en-2048": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-vocab.json", - "xlm-mlm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-ende-1024-vocab.json", - "xlm-mlm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enfr-1024-vocab.json", - "xlm-mlm-enro-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enro-1024-vocab.json", - "xlm-mlm-tlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-tlm-xnli15-1024-vocab.json", - "xlm-mlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-xnli15-1024-vocab.json", - "xlm-clm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-enfr-1024-vocab.json", - "xlm-clm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-clm-ende-1024-vocab.json", - "xlm-mlm-17-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-vocab.json", - "xlm-mlm-100-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-100-1280-vocab.json", + "xlm-mlm-en-2048": "https://huggingface.co/xlm-mlm-en-2048/resolve/main/vocab.json", + "xlm-mlm-ende-1024": "https://huggingface.co/xlm-mlm-ende-1024/resolve/main/vocab.json", + "xlm-mlm-enfr-1024": "https://huggingface.co/xlm-mlm-enfr-1024/resolve/main/vocab.json", + "xlm-mlm-enro-1024": "https://huggingface.co/xlm-mlm-enro-1024/resolve/main/vocab.json", + "xlm-mlm-tlm-xnli15-1024": "https://huggingface.co/xlm-mlm-tlm-xnli15-1024/resolve/main/vocab.json", + "xlm-mlm-xnli15-1024": "https://huggingface.co/xlm-mlm-xnli15-1024/resolve/main/vocab.json", + "xlm-clm-enfr-1024": "https://huggingface.co/xlm-clm-enfr-1024/resolve/main/vocab.json", + "xlm-clm-ende-1024": "https://huggingface.co/xlm-clm-ende-1024/resolve/main/vocab.json", + "xlm-mlm-17-1280": "https://huggingface.co/xlm-mlm-17-1280/resolve/main/vocab.json", + "xlm-mlm-100-1280": "https://huggingface.co/xlm-mlm-100-1280/resolve/main/vocab.json", }, "merges_file": { - "xlm-mlm-en-2048": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-en-2048-merges.txt", - "xlm-mlm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-ende-1024-merges.txt", - "xlm-mlm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enfr-1024-merges.txt", - "xlm-mlm-enro-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enro-1024-merges.txt", - "xlm-mlm-tlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-tlm-xnli15-1024-merges.txt", - "xlm-mlm-xnli15-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-xnli15-1024-merges.txt", - "xlm-clm-enfr-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-enfr-1024-merges.txt", - "xlm-clm-ende-1024": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-ende-1024-merges.txt", - "xlm-mlm-17-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-17-1280-merges.txt", - "xlm-mlm-100-1280": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-mlm-100-1280-merges.txt", + "xlm-mlm-en-2048": "https://huggingface.co/xlm-mlm-en-2048/resolve/main/merges.txt", + "xlm-mlm-ende-1024": "https://huggingface.co/xlm-mlm-ende-1024/resolve/main/merges.txt", + "xlm-mlm-enfr-1024": "https://huggingface.co/xlm-mlm-enfr-1024/resolve/main/merges.txt", + "xlm-mlm-enro-1024": "https://huggingface.co/xlm-mlm-enro-1024/resolve/main/merges.txt", + "xlm-mlm-tlm-xnli15-1024": "https://huggingface.co/xlm-mlm-tlm-xnli15-1024/resolve/main/merges.txt", + "xlm-mlm-xnli15-1024": "https://huggingface.co/xlm-mlm-xnli15-1024/resolve/main/merges.txt", + "xlm-clm-enfr-1024": "https://huggingface.co/xlm-mlm-enfr-1024/resolve/main/merges.txt", + "xlm-clm-ende-1024": "https://huggingface.co/xlm-mlm-ende-1024/resolve/main/merges.txt", + "xlm-mlm-17-1280": "https://huggingface.co/xlm-mlm-17-1280/resolve/main/merges.txt", + "xlm-mlm-100-1280": "https://huggingface.co/xlm-mlm-100-1280/resolve/main/merges.txt", }, } diff --git a/src/transformers/tokenization_xlm_roberta.py b/src/transformers/tokenization_xlm_roberta.py index 031b6ab072..892abbd826 100644 --- a/src/transformers/tokenization_xlm_roberta.py +++ b/src/transformers/tokenization_xlm_roberta.py @@ -33,12 +33,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "xlm-roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-base-sentencepiece.bpe.model", - "xlm-roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll02-dutch": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-dutch-sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll02-spanish": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-spanish-sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll03-english": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-english-sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll03-german": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-german-sentencepiece.bpe.model", + "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll02-dutch": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll02-spanish": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll03-english": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll03-german": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/sentencepiece.bpe.model", } } diff --git a/src/transformers/tokenization_xlm_roberta_fast.py b/src/transformers/tokenization_xlm_roberta_fast.py index 27878706d1..4c59d2d79f 100644 --- a/src/transformers/tokenization_xlm_roberta_fast.py +++ b/src/transformers/tokenization_xlm_roberta_fast.py @@ -36,20 +36,20 @@ VOCAB_FILES_NAMES = {"vocab_file": "sentencepiece.bpe.model", "tokenizer_file": PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "xlm-roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-base-sentencepiece.bpe.model", - "xlm-roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll02-dutch": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-dutch-sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll02-spanish": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-spanish-sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll03-english": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-english-sentencepiece.bpe.model", - "xlm-roberta-large-finetuned-conll03-german": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-german-sentencepiece.bpe.model", + "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll02-dutch": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll02-spanish": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll03-english": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/sentencepiece.bpe.model", + "xlm-roberta-large-finetuned-conll03-german": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/sentencepiece.bpe.model", }, "tokenizer_file": { - "xlm-roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-base-tokenizer.json", - "xlm-roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-tokenizer.json", - "xlm-roberta-large-finetuned-conll02-dutch": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-dutch-tokenizer.json", - "xlm-roberta-large-finetuned-conll02-spanish": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll02-spanish-tokenizer.json", - "xlm-roberta-large-finetuned-conll03-english": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-english-tokenizer.json", - "xlm-roberta-large-finetuned-conll03-german": "https://s3.amazonaws.com/models.huggingface.co/bert/xlm-roberta-large-finetuned-conll03-german-tokenizer.json", + "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json", + "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large/resolve/main/tokenizer.json", + "xlm-roberta-large-finetuned-conll02-dutch": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-dutch/resolve/main/tokenizer.json", + "xlm-roberta-large-finetuned-conll02-spanish": "https://huggingface.co/xlm-roberta-large-finetuned-conll02-spanish/resolve/main/tokenizer.json", + "xlm-roberta-large-finetuned-conll03-english": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-english/resolve/main/tokenizer.json", + "xlm-roberta-large-finetuned-conll03-german": "https://huggingface.co/xlm-roberta-large-finetuned-conll03-german/resolve/main/tokenizer.json", }, } diff --git a/src/transformers/tokenization_xlnet.py b/src/transformers/tokenization_xlnet.py index fb3b9465a4..4aaad1604a 100644 --- a/src/transformers/tokenization_xlnet.py +++ b/src/transformers/tokenization_xlnet.py @@ -33,8 +33,8 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "xlnet-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model", - "xlnet-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-spiece.model", + "xlnet-base-cased": "https://huggingface.co/xlnet-base-cased/resolve/main/spiece.model", + "xlnet-large-cased": "https://huggingface.co/xlnet-large-cased/resolve/main/spiece.model", } } diff --git a/src/transformers/tokenization_xlnet_fast.py b/src/transformers/tokenization_xlnet_fast.py index 671b39da96..336090124c 100644 --- a/src/transformers/tokenization_xlnet_fast.py +++ b/src/transformers/tokenization_xlnet_fast.py @@ -36,12 +36,12 @@ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer. PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "xlnet-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model", - "xlnet-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-spiece.model", + "xlnet-base-cased": "https://huggingface.co/xlnet-base-cased/resolve/main/spiece.model", + "xlnet-large-cased": "https://huggingface.co/xlnet-large-cased/resolve/main/spiece.model", }, "tokenizer_file": { - "xlnet-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-tokenizer.json", - "xlnet-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-large-cased-tokenizer.json", + "xlnet-base-cased": "https://huggingface.co/xlnet-base-cased/resolve/main/tokenizer.json", + "xlnet-large-cased": "https://huggingface.co/xlnet-large-cased/resolve/main/tokenizer.json", }, }