From d4c2cb402d6674211726fd5f4803d1090664e438 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Tue, 2 Jun 2020 09:39:33 -0400 Subject: [PATCH] Kill model archive maps (#4636) * Kill model archive maps * Fixup * Also kill model_archive_map for MaskedBertPreTrainedModel * Unhook config_archive_map * Tokenizers: align with model id changes * make style && make quality * Fix CI --- docs/source/pretrained_models.rst | 34 +-- examples/adversarial/test_hans.py | 9 +- examples/contrib/mm-imdb/run_mmimdb.py | 66 +---- examples/contrib/run_swag.py | 46 +--- .../distillation/run_squad_w_distillation.py | 5 +- .../emmental/configuration_bert_masked.py | 2 - .../emmental/modeling_bert_masked.py | 8 +- examples/movement-pruning/masked_run_glue.py | 4 +- examples/movement-pruning/masked_run_squad.py | 4 +- examples/question-answering/run_squad.py | 4 +- .../bertabs/configuration_bertabs.py | 1 - .../summarization/bertabs/modeling_bertabs.py | 7 +- examples/text-classification/README.md | 3 +- examples/text-classification/run_xnli.py | 57 ++-- examples/token-classification/run_ner.py | 2 +- src/transformers/__init__.py | 68 +++-- src/transformers/configuration_albert.py | 7 +- src/transformers/configuration_auto.py | 50 ++-- src/transformers/configuration_bart.py | 11 +- src/transformers/configuration_bert.py | 20 +- src/transformers/configuration_camembert.py | 1 - src/transformers/configuration_ctrl.py | 7 +- src/transformers/configuration_distilbert.py | 5 - src/transformers/configuration_electra.py | 5 - src/transformers/configuration_flaubert.py | 9 +- src/transformers/configuration_gpt2.py | 5 - src/transformers/configuration_longformer.py | 7 +- src/transformers/configuration_marian.py | 3 +- src/transformers/configuration_openai.py | 7 +- src/transformers/configuration_reformer.py | 5 - src/transformers/configuration_roberta.py | 7 +- src/transformers/configuration_t5.py | 1 - src/transformers/configuration_transfo_xl.py | 7 +- src/transformers/configuration_utils.py | 37 +-- src/transformers/configuration_xlm.py | 5 - src/transformers/configuration_xlm_roberta.py | 1 - src/transformers/configuration_xlnet.py | 5 - .../convert_pytorch_checkpoint_to_tf2.py | 187 +------------ src/transformers/modeling_albert.py | 23 +- src/transformers/modeling_auto.py | 226 ++++++--------- src/transformers/modeling_bart.py | 17 +- src/transformers/modeling_bert.py | 50 ++-- src/transformers/modeling_camembert.py | 17 +- src/transformers/modeling_ctrl.py | 6 +- src/transformers/modeling_distilbert.py | 20 +- src/transformers/modeling_electra.py | 18 +- src/transformers/modeling_flaubert.py | 18 +- src/transformers/modeling_gpt2.py | 16 +- src/transformers/modeling_longformer.py | 21 +- src/transformers/modeling_marian.py | 7 +- src/transformers/modeling_openai.py | 6 +- src/transformers/modeling_reformer.py | 10 +- src/transformers/modeling_roberta.py | 23 +- src/transformers/modeling_t5.py | 16 +- src/transformers/modeling_tf_albert.py | 22 +- src/transformers/modeling_tf_auto.py | 258 +++++++----------- src/transformers/modeling_tf_bert.py | 46 ++-- src/transformers/modeling_tf_camembert.py | 8 +- src/transformers/modeling_tf_ctrl.py | 6 +- src/transformers/modeling_tf_distilbert.py | 18 +- src/transformers/modeling_tf_electra.py | 18 +- src/transformers/modeling_tf_flaubert.py | 7 +- src/transformers/modeling_tf_gpt2.py | 16 +- src/transformers/modeling_tf_openai.py | 6 +- src/transformers/modeling_tf_roberta.py | 14 +- src/transformers/modeling_tf_t5.py | 16 +- src/transformers/modeling_tf_transfo_xl.py | 8 +- src/transformers/modeling_tf_utils.py | 31 +-- src/transformers/modeling_tf_xlm.py | 26 +- src/transformers/modeling_tf_xlm_roberta.py | 8 +- src/transformers/modeling_tf_xlnet.py | 10 +- src/transformers/modeling_transfo_xl.py | 8 +- src/transformers/modeling_utils.py | 34 +-- src/transformers/modeling_xlm.py | 26 +- src/transformers/modeling_xlm_roberta.py | 22 +- src/transformers/modeling_xlnet.py | 10 +- src/transformers/tokenization_auto.py | 74 +++-- src/transformers/tokenization_bert.py | 18 +- .../tokenization_bert_japanese.py | 24 +- src/transformers/tokenization_utils.py | 8 +- .../adding_a_new_example_script/run_xxx.py | 4 +- .../adding_a_new_model/configuration_xxx.py | 1 - .../adding_a_new_model/modeling_tf_xxx.py | 13 +- templates/adding_a_new_model/modeling_xxx.py | 13 +- .../tests/test_modeling_xxx.py | 4 +- tests/test_modeling_albert.py | 4 +- tests/test_modeling_auto.py | 14 +- tests/test_modeling_bart.py | 8 +- tests/test_modeling_bert.py | 4 +- tests/test_modeling_common.py | 4 +- tests/test_modeling_ctrl.py | 4 +- tests/test_modeling_distilbert.py | 2 +- tests/test_modeling_electra.py | 4 +- tests/test_modeling_flaubert.py | 4 +- tests/test_modeling_gpt2.py | 4 +- tests/test_modeling_openai.py | 4 +- tests/test_modeling_reformer.py | 4 +- tests/test_modeling_roberta.py | 4 +- tests/test_modeling_t5.py | 4 +- tests/test_modeling_tf_albert.py | 4 +- tests/test_modeling_tf_auto.py | 10 +- tests/test_modeling_tf_bert.py | 2 +- tests/test_modeling_tf_ctrl.py | 4 +- tests/test_modeling_tf_distilbert.py | 2 +- tests/test_modeling_tf_electra.py | 2 +- tests/test_modeling_tf_gpt2.py | 4 +- tests/test_modeling_tf_openai_gpt.py | 4 +- tests/test_modeling_tf_roberta.py | 4 +- tests/test_modeling_tf_transfo_xl.py | 4 +- tests/test_modeling_tf_xlm.py | 4 +- tests/test_modeling_tf_xlnet.py | 4 +- tests/test_modeling_transfo_xl.py | 4 +- tests/test_modeling_xlm.py | 4 +- tests/test_modeling_xlnet.py | 4 +- tests/test_tokenization_bert_japanese.py | 4 +- 115 files changed, 792 insertions(+), 1323 deletions(-) diff --git a/docs/source/pretrained_models.rst b/docs/source/pretrained_models.rst index 254471a6cd..d6ecda5f14 100644 --- a/docs/source/pretrained_models.rst +++ b/docs/source/pretrained_models.rst @@ -63,33 +63,33 @@ For a list that includes community-uploaded models, refer to `https://huggingfac | | | | Trained on uncased German text by DBMDZ | | | | (see `details on dbmdz repository `__). | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bert-base-japanese`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | +| | ``cl-tohoku/bert-base-japanese`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | | | Trained on Japanese text. Text is tokenized with MeCab and WordPiece. | | | | | `MeCab `__ is required for tokenization. | | | | (see `details on cl-tohoku repository `__). | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bert-base-japanese-whole-word-masking`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | +| | ``cl-tohoku/bert-base-japanese-whole-word-masking`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | | | Trained on Japanese text using Whole-Word-Masking. Text is tokenized with MeCab and WordPiece. | | | | | `MeCab `__ is required for tokenization. | | | | (see `details on cl-tohoku repository `__). | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bert-base-japanese-char`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | +| | ``cl-tohoku/bert-base-japanese-char`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | | | Trained on Japanese text. Text is tokenized into characters. | | | | (see `details on cl-tohoku repository `__). | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bert-base-japanese-char-whole-word-masking`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | +| | ``cl-tohoku/bert-base-japanese-char-whole-word-masking`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | | | Trained on Japanese text using Whole-Word-Masking. Text is tokenized into characters. | | | | (see `details on cl-tohoku repository `__). | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bert-base-finnish-cased-v1`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | +| | ``TurkuNLP/bert-base-finnish-cased-v1`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | | | Trained on cased Finnish text. | | | | (see `details on turkunlp.org `__). | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bert-base-finnish-uncased-v1`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | +| | ``TurkuNLP/bert-base-finnish-uncased-v1`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | | | Trained on uncased Finnish text. | | | | (see `details on turkunlp.org `__). | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bert-base-dutch-cased`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | +| | ``wietsedv/bert-base-dutch-cased`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | | | Trained on cased Dutch text. | | | | (see `details on wietsedv repository `__). | +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ @@ -259,32 +259,32 @@ For a list that includes community-uploaded models, refer to `https://huggingfac | | ``xlm-roberta-large`` | | ~355M parameters with 24-layers, 1027-hidden-state, 4096 feed-forward hidden-state, 16-heads, | | | | | Trained on 2.5 TB of newly created clean CommonCrawl data in 100 languages | +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| FlauBERT | ``flaubert-small-cased`` | | 6-layer, 512-hidden, 8-heads, 54M parameters | +| FlauBERT | ``flaubert/flaubert_small_cased`` | | 6-layer, 512-hidden, 8-heads, 54M parameters | | | | | FlauBERT small architecture | | | | (see `details `__) | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``flaubert-base-uncased`` | | 12-layer, 768-hidden, 12-heads, 137M parameters | +| | ``flaubert/flaubert_base_uncased`` | | 12-layer, 768-hidden, 12-heads, 137M parameters | | | | | FlauBERT base architecture with uncased vocabulary | | | | (see `details `__) | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``flaubert-base-cased`` | | 12-layer, 768-hidden, 12-heads, 138M parameters | +| | ``flaubert/flaubert_base_cased`` | | 12-layer, 768-hidden, 12-heads, 138M parameters | | | | | FlauBERT base architecture with cased vocabulary | | | | (see `details `__) | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``flaubert-large-cased`` | | 24-layer, 1024-hidden, 16-heads, 373M parameters | +| | ``flaubert/flaubert_large_cased`` | | 24-layer, 1024-hidden, 16-heads, 373M parameters | | | | | FlauBERT large architecture | | | | (see `details `__) | +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| Bart | ``bart-large`` | | 24-layer, 1024-hidden, 16-heads, 406M parameters | +| Bart | ``facebook/bart-large`` | | 24-layer, 1024-hidden, 16-heads, 406M parameters | | | | (see `details `_) | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bart-large-mnli`` | | Adds a 2 layer classification head with 1 million parameters | +| | ``facebook/bart-large-mnli`` | | Adds a 2 layer classification head with 1 million parameters | | | | | bart-large base architecture with a classification head, finetuned on MNLI | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``bart-large-cnn`` | | 12-layer, 1024-hidden, 16-heads, 406M parameters (same as base) | +| | ``facebook/bart-large-cnn`` | | 12-layer, 1024-hidden, 16-heads, 406M parameters (same as base) | | | | | bart-large base architecture finetuned on cnn summarization task | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``mbart-large-en-ro`` | | 12-layer, 1024-hidden, 16-heads, 880M parameters | +| | ``facebook/mbart-large-en-ro`` | | 12-layer, 1024-hidden, 16-heads, 880M parameters | | | | | bart-large architecture pretrained on cc25 multilingual data , finetuned on WMT english romanian translation. | +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ | DialoGPT | ``DialoGPT-small`` | | 12-layer, 768-hidden, 12-heads, 124M parameters | @@ -305,9 +305,9 @@ For a list that includes community-uploaded models, refer to `https://huggingfac | MarianMT | ``Helsinki-NLP/opus-mt-{src}-{tgt}`` | | 12-layer, 512-hidden, 8-heads, ~74M parameter Machine translation models. Parameter counts vary depending on vocab size. | | | | | (see `model list `_) | +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| Longformer | ``longformer-base-4096`` | | 12-layer, 768-hidden, 12-heads, ~149M parameters | +| Longformer | ``allenai/longformer-base-4096`` | | 12-layer, 768-hidden, 12-heads, ~149M parameters | | | | | Starting from RoBERTa-base checkpoint, trained on documents of max length 4,096 | | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ -| | ``longformer-large-4096`` | | 24-layer, 1024-hidden, 16-heads, ~435M parameters | +| | ``allenai/longformer-large-4096`` | | 24-layer, 1024-hidden, 16-heads, ~435M parameters | | | | | Starting from RoBERTa-large checkpoint, trained on documents of max length 4,096 | +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/examples/adversarial/test_hans.py b/examples/adversarial/test_hans.py index 99b6f24671..d22fbc8122 100644 --- a/examples/adversarial/test_hans.py +++ b/examples/adversarial/test_hans.py @@ -65,13 +65,6 @@ except ImportError: logger = logging.getLogger(__name__) -ALL_MODELS = sum( - ( - tuple(conf.pretrained_config_archive_map.keys()) - for conf in (BertConfig, XLNetConfig, XLMConfig, RobertaConfig, DistilBertConfig) - ), - (), -) MODEL_CLASSES = { "bert": (BertConfig, BertForSequenceClassification, BertTokenizer), @@ -389,7 +382,7 @@ def main(): default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--task_name", diff --git a/examples/contrib/mm-imdb/run_mmimdb.py b/examples/contrib/mm-imdb/run_mmimdb.py index f74ea575b7..7a3bcbda43 100644 --- a/examples/contrib/mm-imdb/run_mmimdb.py +++ b/examples/contrib/mm-imdb/run_mmimdb.py @@ -34,26 +34,11 @@ from tqdm import tqdm, trange from transformers import ( WEIGHTS_NAME, AdamW, - AlbertConfig, - AlbertModel, - AlbertTokenizer, - BertConfig, - BertModel, - BertTokenizer, - DistilBertConfig, - DistilBertModel, - DistilBertTokenizer, + AutoConfig, + AutoModel, + AutoTokenizer, MMBTConfig, MMBTForClassification, - RobertaConfig, - RobertaModel, - RobertaTokenizer, - XLMConfig, - XLMModel, - XLMTokenizer, - XLNetConfig, - XLNetModel, - XLNetTokenizer, get_linear_schedule_with_warmup, ) from utils_mmimdb import ImageEncoder, JsonlDataset, collate_fn, get_image_transforms, get_mmimdb_labels @@ -67,23 +52,6 @@ except ImportError: logger = logging.getLogger(__name__) -ALL_MODELS = sum( - ( - tuple(conf.pretrained_config_archive_map.keys()) - for conf in (BertConfig, XLNetConfig, XLMConfig, RobertaConfig, DistilBertConfig) - ), - (), -) - -MODEL_CLASSES = { - "bert": (BertConfig, BertModel, BertTokenizer), - "xlnet": (XLNetConfig, XLNetModel, XLNetTokenizer), - "xlm": (XLMConfig, XLMModel, XLMTokenizer), - "roberta": (RobertaConfig, RobertaModel, RobertaTokenizer), - "distilbert": (DistilBertConfig, DistilBertModel, DistilBertTokenizer), - "albert": (AlbertConfig, AlbertModel, AlbertTokenizer), -} - def set_seed(args): random.seed(args.seed) @@ -351,19 +319,12 @@ def main(): required=True, help="The input data dir. Should contain the .jsonl files for MMIMDB.", ) - parser.add_argument( - "--model_type", - default=None, - type=str, - required=True, - help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()), - ) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--output_dir", @@ -385,7 +346,7 @@ def main(): ) parser.add_argument( "--cache_dir", - default="", + default=None, type=str, help="Where do you want to store the pre-trained models downloaded from s3", ) @@ -526,18 +487,14 @@ def main(): # Setup model labels = get_mmimdb_labels() num_labels = len(labels) - args.model_type = args.model_type.lower() - config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] - transformer_config = config_class.from_pretrained( - args.config_name if args.config_name else args.model_name_or_path - ) - tokenizer = tokenizer_class.from_pretrained( + transformer_config = AutoConfig.from_pretrained(args.config_name if args.config_name else args.model_name_or_path) + tokenizer = AutoTokenizer.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, - cache_dir=args.cache_dir if args.cache_dir else None, + cache_dir=args.cache_dir, ) - transformer = model_class.from_pretrained( - args.model_name_or_path, config=transformer_config, cache_dir=args.cache_dir if args.cache_dir else None + transformer = AutoModel.from_pretrained( + args.model_name_or_path, config=transformer_config, cache_dir=args.cache_dir ) img_encoder = ImageEncoder(args) config = MMBTConfig(transformer_config, num_labels=num_labels) @@ -583,13 +540,12 @@ def main(): # Load a trained model and vocabulary that you have fine-tuned model = MMBTForClassification(config, transformer, img_encoder) model.load_state_dict(torch.load(os.path.join(args.output_dir, WEIGHTS_NAME))) - tokenizer = tokenizer_class.from_pretrained(args.output_dir) + tokenizer = AutoTokenizer.from_pretrained(args.output_dir) model.to(args.device) # Evaluation results = {} if args.do_eval and args.local_rank in [-1, 0]: - tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( diff --git a/examples/contrib/run_swag.py b/examples/contrib/run_swag.py index 96a16d8df5..24f9ecca64 100644 --- a/examples/contrib/run_swag.py +++ b/examples/contrib/run_swag.py @@ -31,14 +31,8 @@ from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, Tenso from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm, trange -from transformers import ( - WEIGHTS_NAME, - AdamW, - BertConfig, - BertForMultipleChoice, - BertTokenizer, - get_linear_schedule_with_warmup, -) +from transformers import WEIGHTS_NAME, AdamW, AutoConfig, AutoTokenizer, get_linear_schedule_with_warmup +from transformers.modeling_auto import AutoModelForMultipleChoice try: @@ -49,12 +43,6 @@ except ImportError: logger = logging.getLogger(__name__) -ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in [BertConfig]), ()) - -MODEL_CLASSES = { - "bert": (BertConfig, BertForMultipleChoice, BertTokenizer), -} - class SwagExample(object): """A single training/test example for the SWAG dataset.""" @@ -492,19 +480,12 @@ def main(): required=True, help="SWAG csv for predictions. E.g., val.csv or test.csv", ) - parser.add_argument( - "--model_type", - default=None, - type=str, - required=True, - help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()), - ) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--output_dir", @@ -536,9 +517,6 @@ def main(): parser.add_argument( "--evaluate_during_training", action="store_true", help="Rul evaluation during training at each logging step." ) - parser.add_argument( - "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model." - ) parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument( @@ -652,13 +630,9 @@ def main(): if args.local_rank not in [-1, 0]: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - args.model_type = args.model_type.lower() - config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] - config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path) - tokenizer = tokenizer_class.from_pretrained( - args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case - ) - model = model_class.from_pretrained( + config = AutoConfig.from_pretrained(args.config_name if args.config_name else args.model_name_or_path) + tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,) + model = AutoModelForMultipleChoice.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config ) @@ -694,8 +668,8 @@ def main(): torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Load a trained model and vocabulary that you have fine-tuned - model = model_class.from_pretrained(args.output_dir) - tokenizer = tokenizer_class.from_pretrained(args.output_dir) + model = AutoModelForMultipleChoice.from_pretrained(args.output_dir) + tokenizer = AutoTokenizer.from_pretrained(args.output_dir) model.to(args.device) # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory @@ -718,8 +692,8 @@ def main(): for checkpoint in checkpoints: # Reload the model global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else "" - model = model_class.from_pretrained(checkpoint) - tokenizer = tokenizer_class.from_pretrained(checkpoint) + model = AutoModelForMultipleChoice.from_pretrained(checkpoint) + tokenizer = AutoTokenizer.from_pretrained(checkpoint) model.to(args.device) # Evaluate diff --git a/examples/distillation/run_squad_w_distillation.py b/examples/distillation/run_squad_w_distillation.py index 12a5f3f175..ff9b381561 100644 --- a/examples/distillation/run_squad_w_distillation.py +++ b/examples/distillation/run_squad_w_distillation.py @@ -67,9 +67,6 @@ except ImportError: logger = logging.getLogger(__name__) -ALL_MODELS = sum( - (tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig, XLNetConfig, XLMConfig)), () -) MODEL_CLASSES = { "bert": (BertConfig, BertForQuestionAnswering, BertTokenizer), @@ -505,7 +502,7 @@ def main(): default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--output_dir", diff --git a/examples/movement-pruning/emmental/configuration_bert_masked.py b/examples/movement-pruning/emmental/configuration_bert_masked.py index 2af4ea683a..66d78b0c8f 100644 --- a/examples/movement-pruning/emmental/configuration_bert_masked.py +++ b/examples/movement-pruning/emmental/configuration_bert_masked.py @@ -19,7 +19,6 @@ and adapts it to the specificities of MaskedBert (`pruning_method`, `mask_init` import logging -from transformers.configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP from transformers.configuration_utils import PretrainedConfig @@ -31,7 +30,6 @@ class MaskedBertConfig(PretrainedConfig): A class replicating the `~transformers.BertConfig` with additional parameters for pruning/masking configuration. """ - pretrained_config_archive_map = BERT_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "masked_bert" def __init__( diff --git a/examples/movement-pruning/emmental/modeling_bert_masked.py b/examples/movement-pruning/emmental/modeling_bert_masked.py index f2a045fe53..a87718fa3a 100644 --- a/examples/movement-pruning/emmental/modeling_bert_masked.py +++ b/examples/movement-pruning/emmental/modeling_bert_masked.py @@ -29,12 +29,7 @@ from torch.nn import CrossEntropyLoss, MSELoss from emmental import MaskedBertConfig from emmental.modules import MaskedLinear from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable -from transformers.modeling_bert import ( - ACT2FN, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, - BertLayerNorm, - load_tf_weights_in_bert, -) +from transformers.modeling_bert import ACT2FN, BertLayerNorm, load_tf_weights_in_bert from transformers.modeling_utils import PreTrainedModel, prune_linear_layer @@ -395,7 +390,6 @@ class MaskedBertPreTrainedModel(PreTrainedModel): """ config_class = MaskedBertConfig - pretrained_model_archive_map = BERT_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_bert base_model_prefix = "bert" diff --git a/examples/movement-pruning/masked_run_glue.py b/examples/movement-pruning/masked_run_glue.py index 331bb87c3f..46107ac28e 100644 --- a/examples/movement-pruning/masked_run_glue.py +++ b/examples/movement-pruning/masked_run_glue.py @@ -53,8 +53,6 @@ except ImportError: logger = logging.getLogger(__name__) -ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig,)), (),) - MODEL_CLASSES = { "bert": (BertConfig, BertForSequenceClassification, BertTokenizer), "masked_bert": (MaskedBertConfig, MaskedBertForSequenceClassification, BertTokenizer), @@ -576,7 +574,7 @@ def main(): default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--task_name", diff --git a/examples/movement-pruning/masked_run_squad.py b/examples/movement-pruning/masked_run_squad.py index 1df626e1dd..b85ee7569c 100644 --- a/examples/movement-pruning/masked_run_squad.py +++ b/examples/movement-pruning/masked_run_squad.py @@ -57,8 +57,6 @@ except ImportError: logger = logging.getLogger(__name__) -ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig,)), (),) - MODEL_CLASSES = { "bert": (BertConfig, BertForQuestionAnswering, BertTokenizer), "masked_bert": (MaskedBertConfig, MaskedBertForQuestionAnswering, BertTokenizer), @@ -673,7 +671,7 @@ def main(): default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--output_dir", diff --git a/examples/question-answering/run_squad.py b/examples/question-answering/run_squad.py index 892138d188..3fe887d650 100644 --- a/examples/question-answering/run_squad.py +++ b/examples/question-answering/run_squad.py @@ -58,8 +58,6 @@ logger = logging.getLogger(__name__) MODEL_CONFIG_CLASSES = list(MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()) MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) -ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in MODEL_CONFIG_CLASSES), (),) - def set_seed(args): random.seed(args.seed) @@ -491,7 +489,7 @@ def main(): default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--output_dir", diff --git a/examples/summarization/bertabs/configuration_bertabs.py b/examples/summarization/bertabs/configuration_bertabs.py index 77acc84e8f..ac79d20062 100644 --- a/examples/summarization/bertabs/configuration_bertabs.py +++ b/examples/summarization/bertabs/configuration_bertabs.py @@ -61,7 +61,6 @@ class BertAbsConfig(PretrainedConfig): the decoder. """ - pretrained_config_archive_map = BERTABS_FINETUNED_CONFIG_MAP model_type = "bertabs" def __init__( diff --git a/examples/summarization/bertabs/modeling_bertabs.py b/examples/summarization/bertabs/modeling_bertabs.py index 71c901a21f..c1d04e8c71 100644 --- a/examples/summarization/bertabs/modeling_bertabs.py +++ b/examples/summarization/bertabs/modeling_bertabs.py @@ -33,14 +33,13 @@ from transformers import BertConfig, BertModel, PreTrainedModel MAX_SIZE = 5000 -BERTABS_FINETUNED_MODEL_MAP = { - "bertabs-finetuned-cnndm": "https://cdn.huggingface.co/remi/bertabs-finetuned-cnndm-extractive-abstractive-summarization/pytorch_model.bin", -} +BERTABS_FINETUNED_MODEL_ARCHIVE_LIST = [ + "remi/bertabs-finetuned-cnndm-extractive-abstractive-summarization", +] class BertAbsPreTrainedModel(PreTrainedModel): config_class = BertAbsConfig - pretrained_model_archive_map = BERTABS_FINETUNED_MODEL_MAP load_tf_weights = False base_model_prefix = "bert" diff --git a/examples/text-classification/README.md b/examples/text-classification/README.md index 39279208c9..b4f8bd2b6f 100644 --- a/examples/text-classification/README.md +++ b/examples/text-classification/README.md @@ -258,7 +258,7 @@ TEST RESULTS {'val_loss': tensor(0.0707), 'precision': 0.852427800698191, 'recal Based on the script [`run_xnli.py`](https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_xnli.py). -[XNLI](https://www.nyu.edu/projects/bowman/xnli/) is crowd-sourced dataset based on [MultiNLI](http://www.nyu.edu/projects/bowman/multinli/). It is an evaluation benchmark for cross-lingual text representations. Pairs of text are labeled with textual entailment annotations for 15 different languages (including both high-resource language such as English and low-resource languages such as Swahili). +[XNLI](https://www.nyu.edu/projects/bowman/xnli/) is a crowd-sourced dataset based on [MultiNLI](http://www.nyu.edu/projects/bowman/multinli/). It is an evaluation benchmark for cross-lingual text representations. Pairs of text are labeled with textual entailment annotations for 15 different languages (including both high-resource language such as English and low-resource languages such as Swahili). #### Fine-tuning on XNLI @@ -273,7 +273,6 @@ on a single tesla V100 16GB. The data for XNLI can be downloaded with the follow export XNLI_DIR=/path/to/XNLI python run_xnli.py \ - --model_type bert \ --model_name_or_path bert-base-multilingual-cased \ --language de \ --train_language en \ diff --git a/examples/text-classification/run_xnli.py b/examples/text-classification/run_xnli.py index d902d22cd2..f0ee5120fa 100644 --- a/examples/text-classification/run_xnli.py +++ b/examples/text-classification/run_xnli.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Finetuning multi-lingual models on XNLI (Bert, DistilBERT, XLM). +""" Finetuning multi-lingual models on XNLI (e.g. Bert, DistilBERT, XLM). Adapted from `examples/text-classification/run_glue.py`""" @@ -32,15 +32,9 @@ from tqdm import tqdm, trange from transformers import ( WEIGHTS_NAME, AdamW, - BertConfig, - BertForSequenceClassification, - BertTokenizer, - DistilBertConfig, - DistilBertForSequenceClassification, - DistilBertTokenizer, - XLMConfig, - XLMForSequenceClassification, - XLMTokenizer, + AutoConfig, + AutoModelForSequenceClassification, + AutoTokenizer, get_linear_schedule_with_warmup, ) from transformers import glue_convert_examples_to_features as convert_examples_to_features @@ -57,16 +51,6 @@ except ImportError: logger = logging.getLogger(__name__) -ALL_MODELS = sum( - (tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig, DistilBertConfig, XLMConfig)), () -) - -MODEL_CLASSES = { - "bert": (BertConfig, BertForSequenceClassification, BertTokenizer), - "xlm": (XLMConfig, XLMForSequenceClassification, XLMTokenizer), - "distilbert": (DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer), -} - def set_seed(args): random.seed(args.seed) @@ -377,19 +361,12 @@ def main(): required=True, help="The input data dir. Should contain the .tsv files (or other data files) for the task.", ) - parser.add_argument( - "--model_type", - default=None, - type=str, - required=True, - help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()), - ) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--language", @@ -421,7 +398,7 @@ def main(): ) parser.add_argument( "--cache_dir", - default="", + default=None, type=str, help="Where do you want to store the pre-trained models downloaded from s3", ) @@ -562,24 +539,23 @@ def main(): if args.local_rank not in [-1, 0]: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab - args.model_type = args.model_type.lower() - config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] - config = config_class.from_pretrained( + config = AutoConfig.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, num_labels=num_labels, finetuning_task=args.task_name, - cache_dir=args.cache_dir if args.cache_dir else None, + cache_dir=args.cache_dir, ) - tokenizer = tokenizer_class.from_pretrained( + args.model_type = config.model_type + tokenizer = AutoTokenizer.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, - cache_dir=args.cache_dir if args.cache_dir else None, + cache_dir=args.cache_dir, ) - model = model_class.from_pretrained( + model = AutoModelForSequenceClassification.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, - cache_dir=args.cache_dir if args.cache_dir else None, + cache_dir=args.cache_dir, ) if args.local_rank == 0: @@ -614,14 +590,13 @@ def main(): torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Load a trained model and vocabulary that you have fine-tuned - model = model_class.from_pretrained(args.output_dir) - tokenizer = tokenizer_class.from_pretrained(args.output_dir) + model = AutoModelForSequenceClassification.from_pretrained(args.output_dir) + tokenizer = AutoTokenizer.from_pretrained(args.output_dir) model.to(args.device) # Evaluation results = {} if args.do_eval and args.local_rank in [-1, 0]: - tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( @@ -633,7 +608,7 @@ def main(): global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else "" prefix = checkpoint.split("/")[-1] if checkpoint.find("checkpoint") != -1 else "" - model = model_class.from_pretrained(checkpoint) + model = AutoModelForSequenceClassification.from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, tokenizer, prefix=prefix) result = dict((k + "_{}".format(global_step), v) for k, v in result.items()) diff --git a/examples/token-classification/run_ner.py b/examples/token-classification/run_ner.py index 7f79fb6d4e..a5698f5838 100644 --- a/examples/token-classification/run_ner.py +++ b/examples/token-classification/run_ner.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" Fine-tuning the library models for named entity recognition on CoNLL-2003 (Bert or Roberta). """ +""" Fine-tuning the library models for named entity recognition on CoNLL-2003. """ import logging diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 6c392478bd..d28681854c 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -159,7 +159,6 @@ if is_torch_available(): AutoModelWithLMHead, AutoModelForTokenClassification, AutoModelForMultipleChoice, - ALL_PRETRAINED_MODEL_ARCHIVE_MAP, MODEL_MAPPING, MODEL_FOR_PRETRAINING_MAPPING, MODEL_WITH_LM_HEAD_MAPPING, @@ -180,7 +179,7 @@ if is_torch_available(): BertForTokenClassification, BertForQuestionAnswering, load_tf_weights_in_bert, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, + BERT_PRETRAINED_MODEL_ARCHIVE_LIST, BertLayer, ) from .modeling_openai import ( @@ -189,7 +188,7 @@ if is_torch_available(): OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel, load_tf_weights_in_openai_gpt, - OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, + OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_transfo_xl import ( TransfoXLPreTrainedModel, @@ -197,7 +196,7 @@ if is_torch_available(): TransfoXLLMHeadModel, AdaptiveEmbedding, load_tf_weights_in_transfo_xl, - TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, + TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_gpt2 import ( GPT2PreTrainedModel, @@ -205,9 +204,9 @@ if is_torch_available(): GPT2LMHeadModel, GPT2DoubleHeadsModel, load_tf_weights_in_gpt2, - GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, + GPT2_PRETRAINED_MODEL_ARCHIVE_LIST, ) - from .modeling_ctrl import CTRLPreTrainedModel, CTRLModel, CTRLLMHeadModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP + from .modeling_ctrl import CTRLPreTrainedModel, CTRLModel, CTRLLMHeadModel, CTRL_PRETRAINED_MODEL_ARCHIVE_LIST from .modeling_xlnet import ( XLNetPreTrainedModel, XLNetModel, @@ -218,7 +217,7 @@ if is_torch_available(): XLNetForQuestionAnsweringSimple, XLNetForQuestionAnswering, load_tf_weights_in_xlnet, - XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, + XLNET_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_xlm import ( XLMPreTrainedModel, @@ -228,13 +227,13 @@ if is_torch_available(): XLMForTokenClassification, XLMForQuestionAnswering, XLMForQuestionAnsweringSimple, - XLM_PRETRAINED_MODEL_ARCHIVE_MAP, + XLM_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_bart import ( BartForSequenceClassification, BartModel, BartForConditionalGeneration, - BART_PRETRAINED_MODEL_ARCHIVE_MAP, + BART_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_marian import MarianMTModel from .tokenization_marian import MarianTokenizer @@ -245,7 +244,7 @@ if is_torch_available(): RobertaForMultipleChoice, RobertaForTokenClassification, RobertaForQuestionAnswering, - ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, + ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_distilbert import ( DistilBertPreTrainedModel, @@ -254,7 +253,7 @@ if is_torch_available(): DistilBertForSequenceClassification, DistilBertForQuestionAnswering, DistilBertForTokenClassification, - DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_camembert import ( CamembertForMaskedLM, @@ -263,7 +262,7 @@ if is_torch_available(): CamembertForMultipleChoice, CamembertForTokenClassification, CamembertForQuestionAnswering, - CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_encoder_decoder import EncoderDecoderModel from .modeling_t5 import ( @@ -271,7 +270,7 @@ if is_torch_available(): T5Model, T5ForConditionalGeneration, load_tf_weights_in_t5, - T5_PRETRAINED_MODEL_ARCHIVE_MAP, + T5_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_albert import ( AlbertPreTrainedModel, @@ -282,7 +281,7 @@ if is_torch_available(): AlbertForQuestionAnswering, AlbertForTokenClassification, load_tf_weights_in_albert, - ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_xlm_roberta import ( XLMRobertaForMaskedLM, @@ -290,7 +289,7 @@ if is_torch_available(): XLMRobertaForMultipleChoice, XLMRobertaForSequenceClassification, XLMRobertaForTokenClassification, - XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, + XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_mmbt import ModalEmbeddings, MMBTModel, MMBTForClassification @@ -300,7 +299,7 @@ if is_torch_available(): FlaubertForSequenceClassification, FlaubertForQuestionAnswering, FlaubertForQuestionAnsweringSimple, - FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_electra import ( @@ -311,7 +310,7 @@ if is_torch_available(): ElectraForSequenceClassification, ElectraModel, load_tf_weights_in_electra, - ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, + ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_reformer import ( @@ -319,7 +318,7 @@ if is_torch_available(): ReformerLayer, ReformerModel, ReformerModelWithLMHead, - REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP, + REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_longformer import ( @@ -329,7 +328,7 @@ if is_torch_available(): LongformerForMultipleChoice, LongformerForTokenClassification, LongformerForQuestionAnswering, - LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP, + LONGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, ) # Optimization @@ -367,7 +366,6 @@ if is_tf_available(): TFAutoModelForQuestionAnswering, TFAutoModelWithLMHead, TFAutoModelForTokenClassification, - TF_ALL_PRETRAINED_MODEL_ARCHIVE_MAP, TF_MODEL_MAPPING, TF_MODEL_FOR_PRETRAINING_MAPPING, TF_MODEL_WITH_LM_HEAD_MAPPING, @@ -388,7 +386,7 @@ if is_tf_available(): TFBertForMultipleChoice, TFBertForTokenClassification, TFBertForQuestionAnswering, - TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_gpt2 import ( @@ -397,7 +395,7 @@ if is_tf_available(): TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel, - TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_openai import ( @@ -406,7 +404,7 @@ if is_tf_available(): TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel, - TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_transfo_xl import ( @@ -414,7 +412,7 @@ if is_tf_available(): TFTransfoXLMainLayer, TFTransfoXLModel, TFTransfoXLLMHeadModel, - TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST, TFAdaptiveEmbedding, ) @@ -426,7 +424,7 @@ if is_tf_available(): TFXLNetForSequenceClassification, TFXLNetForTokenClassification, TFXLNetForQuestionAnsweringSimple, - TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_xlm import ( @@ -436,7 +434,7 @@ if is_tf_available(): TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple, - TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_xlm_roberta import ( @@ -444,7 +442,7 @@ if is_tf_available(): TFXLMRobertaModel, TFXLMRobertaForSequenceClassification, TFXLMRobertaForTokenClassification, - TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_roberta import ( @@ -455,7 +453,7 @@ if is_tf_available(): TFRobertaForSequenceClassification, TFRobertaForTokenClassification, TFRobertaForQuestionAnswering, - TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_camembert import ( @@ -463,14 +461,14 @@ if is_tf_available(): TFCamembertForMaskedLM, TFCamembertForSequenceClassification, TFCamembertForTokenClassification, - TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_flaubert import ( TFFlaubertModel, TFFlaubertWithLMHeadModel, TFFlaubertForSequenceClassification, - TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_distilbert import ( @@ -481,14 +479,14 @@ if is_tf_available(): TFDistilBertForSequenceClassification, TFDistilBertForTokenClassification, TFDistilBertForQuestionAnswering, - TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_ctrl import ( TFCTRLPreTrainedModel, TFCTRLModel, TFCTRLLMHeadModel, - TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_albert import ( @@ -500,14 +498,14 @@ if is_tf_available(): TFAlbertForMultipleChoice, TFAlbertForSequenceClassification, TFAlbertForQuestionAnswering, - TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_t5 import ( TFT5PreTrainedModel, TFT5Model, TFT5ForConditionalGeneration, - TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST, ) from .modeling_tf_electra import ( @@ -516,7 +514,7 @@ if is_tf_available(): TFElectraForPreTraining, TFElectraForMaskedLM, TFElectraForTokenClassification, - TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST, ) # Optimization diff --git a/src/transformers/configuration_albert.py b/src/transformers/configuration_albert.py index 2f3ae0df74..899d5da388 100644 --- a/src/transformers/configuration_albert.py +++ b/src/transformers/configuration_albert.py @@ -32,7 +32,7 @@ ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { class AlbertConfig(PretrainedConfig): r""" - This is the configuration class to store the configuration of an :class:`~transformers.AlbertModel`. + This is the configuration class to store the configuration of a :class:`~transformers.AlbertModel`. It is used to instantiate an ALBERT model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the ALBERT `xxlarge `__ architecture. @@ -97,13 +97,8 @@ class AlbertConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "albert" def __init__( diff --git a/src/transformers/configuration_auto.py b/src/transformers/configuration_auto.py index 89909a2ad4..e9ba1de597 100644 --- a/src/transformers/configuration_auto.py +++ b/src/transformers/configuration_auto.py @@ -113,12 +113,12 @@ class AutoConfig: ) @classmethod - def for_model(cls, model_type, *args, **kwargs): - for pattern, config_class in CONFIG_MAPPING.items(): - if pattern in model_type: - return config_class(*args, **kwargs) + def for_model(cls, model_type: str, *args, **kwargs): + if model_type in CONFIG_MAPPING: + config_class = CONFIG_MAPPING[model_type] + return config_class(*args, **kwargs) raise ValueError( - "Unrecognized model identifier in {}. Should contain one of {}".format( + "Unrecognized model identifier: {}. Should contain one of {}".format( model_type, ", ".join(CONFIG_MAPPING.keys()) ) ) @@ -130,24 +130,24 @@ class AutoConfig: The configuration class to instantiate is selected based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - contains `t5`: :class:`~transformers.T5Config` (T5 model) - - contains `distilbert`: :class:`~transformers.DistilBertConfig` (DistilBERT model) - - contains `albert`: :class:`~transformers.AlbertConfig` (ALBERT model) - - contains `camembert`: :class:`~transformers.CamembertConfig` (CamemBERT model) - - contains `xlm-roberta`: :class:`~transformers.XLMRobertaConfig` (XLM-RoBERTa model) - - contains `longformer`: :class:`~transformers.LongformerConfig` (Longformer model) - - contains `roberta`: :class:`~transformers.RobertaConfig` (RoBERTa model) - - contains `reformer`: :class:`~transformers.ReformerConfig` (Reformer model) - - contains `bert`: :class:`~transformers.BertConfig` (Bert model) - - contains `openai-gpt`: :class:`~transformers.OpenAIGPTConfig` (OpenAI GPT model) - - contains `gpt2`: :class:`~transformers.GPT2Config` (OpenAI GPT-2 model) - - contains `transfo-xl`: :class:`~transformers.TransfoXLConfig` (Transformer-XL model) - - contains `xlnet`: :class:`~transformers.XLNetConfig` (XLNet model) - - contains `xlm`: :class:`~transformers.XLMConfig` (XLM model) - - contains `ctrl` : :class:`~transformers.CTRLConfig` (CTRL model) - - contains `flaubert` : :class:`~transformers.FlaubertConfig` (Flaubert model) - - contains `electra` : :class:`~transformers.ElectraConfig` (ELECTRA model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: :class:`~transformers.T5Config` (T5 model) + - `distilbert`: :class:`~transformers.DistilBertConfig` (DistilBERT model) + - `albert`: :class:`~transformers.AlbertConfig` (ALBERT model) + - `camembert`: :class:`~transformers.CamembertConfig` (CamemBERT model) + - `xlm-roberta`: :class:`~transformers.XLMRobertaConfig` (XLM-RoBERTa model) + - `longformer`: :class:`~transformers.LongformerConfig` (Longformer model) + - `roberta`: :class:`~transformers.RobertaConfig` (RoBERTa model) + - `reformer`: :class:`~transformers.ReformerConfig` (Reformer model) + - `bert`: :class:`~transformers.BertConfig` (Bert model) + - `openai-gpt`: :class:`~transformers.OpenAIGPTConfig` (OpenAI GPT model) + - `gpt2`: :class:`~transformers.GPT2Config` (OpenAI GPT-2 model) + - `transfo-xl`: :class:`~transformers.TransfoXLConfig` (Transformer-XL model) + - `xlnet`: :class:`~transformers.XLNetConfig` (XLNet model) + - `xlm`: :class:`~transformers.XLMConfig` (XLM model) + - `ctrl` : :class:`~transformers.CTRLConfig` (CTRL model) + - `flaubert` : :class:`~transformers.FlaubertConfig` (Flaubert model) + - `electra` : :class:`~transformers.ElectraConfig` (ELECTRA model) Args: pretrained_model_name_or_path (:obj:`string`): @@ -193,9 +193,7 @@ class AutoConfig: assert unused_kwargs == {'foo': False} """ - config_dict, _ = PretrainedConfig.get_config_dict( - pretrained_model_name_or_path, pretrained_config_archive_map=ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, **kwargs - ) + config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) if "model_type" in config_dict: config_class = CONFIG_MAPPING[config_dict["model_type"]] diff --git a/src/transformers/configuration_bart.py b/src/transformers/configuration_bart.py index 6732db90d7..3b9778ff23 100644 --- a/src/transformers/configuration_bart.py +++ b/src/transformers/configuration_bart.py @@ -23,11 +23,11 @@ from .configuration_utils import PretrainedConfig logger = logging.getLogger(__name__) BART_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "bart-large": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large/config.json", - "bart-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-mnli/config.json", - "bart-large-cnn": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-cnn/config.json", - "bart-large-xsum": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-xsum/config.json", - "mbart-large-en-ro": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/config.json", + "facebook/bart-large": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large/config.json", + "facebook/bart-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-mnli/config.json", + "facebook/bart-large-cnn": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-cnn/config.json", + "facebook/bart-large-xsum": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/bart-large-xsum/config.json", + "facebook/mbart-large-en-ro": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/mbart-large-en-ro/config.json", } @@ -36,7 +36,6 @@ class BartConfig(PretrainedConfig): Configuration class for Bart. Parameters are renamed from the fairseq implementation """ model_type = "bart" - pretrained_config_archive_map = BART_PRETRAINED_CONFIG_ARCHIVE_MAP def __init__( self, diff --git a/src/transformers/configuration_bert.py b/src/transformers/configuration_bert.py index 5026954468..d03f573c54 100644 --- a/src/transformers/configuration_bert.py +++ b/src/transformers/configuration_bert.py @@ -39,13 +39,14 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { "bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-config.json", "bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-config.json", "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-config.json", - "bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/config.json", - "bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/config.json", - "bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/config.json", - "bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/config.json", - "bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/config.json", - "bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/config.json", - "bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/config.json", + "cl-tohoku/bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/config.json", + "cl-tohoku/bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/config.json", + "cl-tohoku/bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/config.json", + "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/config.json", + "TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/config.json", + "TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/config.json", + "wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/config.json", + # See all BERT models at https://huggingface.co/models?filter=bert } @@ -102,12 +103,7 @@ class BertConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = BERT_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "bert" def __init__( diff --git a/src/transformers/configuration_camembert.py b/src/transformers/configuration_camembert.py index f930fe2ece..6ca969beae 100644 --- a/src/transformers/configuration_camembert.py +++ b/src/transformers/configuration_camembert.py @@ -36,5 +36,4 @@ class CamembertConfig(RobertaConfig): superclass for the appropriate documentation alongside usage examples. """ - pretrained_config_archive_map = CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "camembert" diff --git a/src/transformers/configuration_ctrl.py b/src/transformers/configuration_ctrl.py index 4daba2a97a..68e75e6adf 100644 --- a/src/transformers/configuration_ctrl.py +++ b/src/transformers/configuration_ctrl.py @@ -27,7 +27,7 @@ CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf class CTRLConfig(PretrainedConfig): """ - This is the configuration class to store the configuration of an :class:`~transformers.CTRLModel`. + This is the configuration class to store the configuration of a :class:`~transformers.CTRLModel`. It is used to instantiate an CTRL model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the `ctrl `__ architecture from SalesForce. @@ -76,13 +76,8 @@ class CTRLConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "ctrl" def __init__( diff --git a/src/transformers/configuration_distilbert.py b/src/transformers/configuration_distilbert.py index 2bb838bd98..09f37d9638 100644 --- a/src/transformers/configuration_distilbert.py +++ b/src/transformers/configuration_distilbert.py @@ -90,12 +90,7 @@ class DistilBertConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "distilbert" def __init__( diff --git a/src/transformers/configuration_electra.py b/src/transformers/configuration_electra.py index 8cfba54be0..c673e601ca 100644 --- a/src/transformers/configuration_electra.py +++ b/src/transformers/configuration_electra.py @@ -89,12 +89,7 @@ class ElectraConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "electra" def __init__( diff --git a/src/transformers/configuration_flaubert.py b/src/transformers/configuration_flaubert.py index c807f63d38..7b3cc886b2 100644 --- a/src/transformers/configuration_flaubert.py +++ b/src/transformers/configuration_flaubert.py @@ -23,10 +23,10 @@ from .configuration_xlm import XLMConfig logger = logging.getLogger(__name__) FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "flaubert-small-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/config.json", - "flaubert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/config.json", - "flaubert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/config.json", - "flaubert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/config.json", + "flaubert/flaubert_small_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/config.json", + "flaubert/flaubert_base_uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/config.json", + "flaubert/flaubert_base_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/config.json", + "flaubert/flaubert_large_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/config.json", } @@ -142,7 +142,6 @@ class FlaubertConfig(XLMConfig): text in a given language. """ - pretrained_config_archive_map = FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "flaubert" def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_token_id=0, **kwargs): diff --git a/src/transformers/configuration_gpt2.py b/src/transformers/configuration_gpt2.py index 0e85a91821..0d282637dd 100644 --- a/src/transformers/configuration_gpt2.py +++ b/src/transformers/configuration_gpt2.py @@ -110,13 +110,8 @@ class GPT2Config(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "gpt2" def __init__( diff --git a/src/transformers/configuration_longformer.py b/src/transformers/configuration_longformer.py index bccdc6c6e7..e8daf49e11 100644 --- a/src/transformers/configuration_longformer.py +++ b/src/transformers/configuration_longformer.py @@ -33,7 +33,7 @@ LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { class LongformerConfig(RobertaConfig): r""" - This is the configuration class to store the configuration of an :class:`~transformers.LongformerModel`. + This is the configuration class to store the configuration of a :class:`~transformers.LongformerModel`. It is used to instantiate an Longformer model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the RoBERTa `roberta-base `__ architecture with a sequence length 4,096. @@ -59,12 +59,7 @@ class LongformerConfig(RobertaConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "longformer" def __init__(self, attention_window: Union[List[int], int] = 512, sep_token_id: int = 2, **kwargs): diff --git a/src/transformers/configuration_marian.py b/src/transformers/configuration_marian.py index c792c5de3c..019f4948d5 100644 --- a/src/transformers/configuration_marian.py +++ b/src/transformers/configuration_marian.py @@ -18,10 +18,9 @@ from .configuration_bart import BartConfig PRETRAINED_CONFIG_ARCHIVE_MAP = { - "marian-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json", + "Helsinki-NLP/opus-mt-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json", } class MarianConfig(BartConfig): model_type = "marian" - pretrained_config_archive_map = PRETRAINED_CONFIG_ARCHIVE_MAP diff --git a/src/transformers/configuration_openai.py b/src/transformers/configuration_openai.py index 528558144a..39080196a1 100644 --- a/src/transformers/configuration_openai.py +++ b/src/transformers/configuration_openai.py @@ -30,7 +30,7 @@ OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP = { class OpenAIGPTConfig(PretrainedConfig): """ - This is the configuration class to store the configuration of an :class:`~transformers.OpenAIGPTModel`. + This is the configuration class to store the configuration of a :class:`~transformers.OpenAIGPTModel`. It is used to instantiate an GPT model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the `GPT `__ architecture from OpenAI. @@ -108,13 +108,8 @@ class OpenAIGPTConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "openai-gpt" def __init__( diff --git a/src/transformers/configuration_reformer.py b/src/transformers/configuration_reformer.py index 04b50ffffc..2deabcaf94 100644 --- a/src/transformers/configuration_reformer.py +++ b/src/transformers/configuration_reformer.py @@ -135,12 +135,7 @@ class ReformerConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "reformer" def __init__( diff --git a/src/transformers/configuration_roberta.py b/src/transformers/configuration_roberta.py index 80bb34e77b..a0ac8dcc96 100644 --- a/src/transformers/configuration_roberta.py +++ b/src/transformers/configuration_roberta.py @@ -35,7 +35,7 @@ ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { class RobertaConfig(BertConfig): r""" - This is the configuration class to store the configuration of an :class:`~transformers.RobertaModel`. + This is the configuration class to store the configuration of a :class:`~transformers.RobertaModel`. It is used to instantiate an RoBERTa model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the BERT `bert-base-uncased `__ architecture. @@ -59,12 +59,7 @@ class RobertaConfig(BertConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "roberta" def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs): diff --git a/src/transformers/configuration_t5.py b/src/transformers/configuration_t5.py index a3098ef4f2..05c1d87b88 100644 --- a/src/transformers/configuration_t5.py +++ b/src/transformers/configuration_t5.py @@ -59,7 +59,6 @@ class T5Config(PretrainedConfig): initializer_factor: A factor for initializing all weight matrices (should be kept to 1.0, used for initialization testing). layer_norm_eps: The epsilon used by LayerNorm. """ - pretrained_config_archive_map = T5_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "t5" def __init__( diff --git a/src/transformers/configuration_transfo_xl.py b/src/transformers/configuration_transfo_xl.py index 2e484d327c..8e26616696 100644 --- a/src/transformers/configuration_transfo_xl.py +++ b/src/transformers/configuration_transfo_xl.py @@ -30,7 +30,7 @@ TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP = { class TransfoXLConfig(PretrainedConfig): """ - This is the configuration class to store the configuration of an :class:`~transformers.TransfoXLModel`. + This is the configuration class to store the configuration of a :class:`~transformers.TransfoXLModel`. It is used to instantiate a Transformer XL model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the `Transformer XL `__ architecture. @@ -110,13 +110,8 @@ class TransfoXLConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "transfo-xl" def __init__( diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 56a1a16925..a411e1d730 100644 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -20,7 +20,7 @@ import copy import json import logging import os -from typing import Dict, Optional, Tuple +from typing import Dict, Tuple from .file_utils import CONFIG_NAME, cached_path, hf_bucket_url, is_remote_url @@ -37,7 +37,6 @@ class PretrainedConfig(object): It only affects the model's configuration. Class attributes (overridden by derived classes): - - ``pretrained_config_archive_map``: a python ``dict`` with `shortcut names` (string) as keys and `url` (string) of associated pretrained model configurations as values. - ``model_type``: a string that identifies the model type, that we serialize into the JSON file, and that we use to recreate the correct object in :class:`~transformers.AutoConfig`. Args: @@ -52,7 +51,6 @@ class PretrainedConfig(object): torchscript (:obj:`bool`, `optional`, defaults to :obj:`False`): Is the model used with Torchscript (for PyTorch models). """ - pretrained_config_archive_map: Dict[str, str] = {} model_type: str = "" def __init__(self, **kwargs): @@ -204,9 +202,7 @@ class PretrainedConfig(object): return cls.from_dict(config_dict, **kwargs) @classmethod - def get_config_dict( - cls, pretrained_model_name_or_path: str, pretrained_config_archive_map: Optional[Dict] = None, **kwargs - ) -> Tuple[Dict, Dict]: + def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs) -> Tuple[Dict, Dict]: """ From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a Config using `from_dict`. @@ -214,8 +210,6 @@ class PretrainedConfig(object): Parameters: pretrained_model_name_or_path (:obj:`string`): The identifier of the pre-trained checkpoint from which we want the dictionary of parameters. - pretrained_config_archive_map: (:obj:`Dict[str, str]`, `optional`) Dict: - A map of `shortcut names` to `url`. By default, will use the current class attribute. Returns: :obj:`Tuple[Dict, Dict]`: The dictionary that will be used to instantiate the configuration object. @@ -227,12 +221,7 @@ class PretrainedConfig(object): proxies = kwargs.pop("proxies", None) local_files_only = kwargs.pop("local_files_only", False) - if pretrained_config_archive_map is None: - pretrained_config_archive_map = cls.pretrained_config_archive_map - - if pretrained_model_name_or_path in pretrained_config_archive_map: - config_file = pretrained_config_archive_map[pretrained_model_name_or_path] - elif os.path.isdir(pretrained_model_name_or_path): + if os.path.isdir(pretrained_model_name_or_path): config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME) elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): config_file = pretrained_model_name_or_path @@ -255,21 +244,11 @@ class PretrainedConfig(object): config_dict = cls._dict_from_json_file(resolved_config_file) except EnvironmentError: - if pretrained_model_name_or_path in pretrained_config_archive_map: - msg = "Couldn't reach server at '{}' to download pretrained model configuration file.".format( - config_file - ) - else: - msg = ( - "Can't load '{}'. Make sure that:\n\n" - "- '{}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n" - "- or '{}' is the correct path to a directory containing a '{}' file\n\n".format( - pretrained_model_name_or_path, - pretrained_model_name_or_path, - pretrained_model_name_or_path, - CONFIG_NAME, - ) - ) + msg = ( + f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n" + f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n" + f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n" + ) raise EnvironmentError(msg) except json.JSONDecodeError: diff --git a/src/transformers/configuration_xlm.py b/src/transformers/configuration_xlm.py index 73fbd99a19..1655119f41 100644 --- a/src/transformers/configuration_xlm.py +++ b/src/transformers/configuration_xlm.py @@ -152,13 +152,8 @@ class XLMConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = XLM_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "xlm" def __init__( diff --git a/src/transformers/configuration_xlm_roberta.py b/src/transformers/configuration_xlm_roberta.py index 330bc0d41f..3a4a0c5185 100644 --- a/src/transformers/configuration_xlm_roberta.py +++ b/src/transformers/configuration_xlm_roberta.py @@ -39,5 +39,4 @@ class XLMRobertaConfig(RobertaConfig): superclass for the appropriate documentation alongside usage examples. """ - pretrained_config_archive_map = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "xlm-roberta" diff --git a/src/transformers/configuration_xlnet.py b/src/transformers/configuration_xlnet.py index 109d74fb25..cfd09bdd84 100644 --- a/src/transformers/configuration_xlnet.py +++ b/src/transformers/configuration_xlnet.py @@ -122,13 +122,8 @@ class XLNetConfig(PretrainedConfig): # Accessing the model configuration configuration = model.config - - Attributes: - pretrained_config_archive_map (Dict[str, str]): - A dictionary containing all the available pre-trained checkpoints. """ - pretrained_config_archive_map = XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "xlnet" def __init__( diff --git a/src/transformers/convert_pytorch_checkpoint_to_tf2.py b/src/transformers/convert_pytorch_checkpoint_to_tf2.py index 084450de21..91dd2e8908 100755 --- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py +++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py @@ -32,6 +32,7 @@ from transformers import ( ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, T5_PRETRAINED_CONFIG_ARCHIVE_MAP, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, + WEIGHTS_NAME, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, @@ -70,6 +71,7 @@ from transformers import ( XLMRobertaConfig, XLNetConfig, cached_path, + hf_bucket_url, is_torch_available, load_pytorch_checkpoint_in_tf2_model, ) @@ -82,261 +84,103 @@ if is_torch_available(): BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, GPT2LMHeadModel, - GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, XLNetLMHeadModel, - XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, XLMWithLMHeadModel, - XLM_PRETRAINED_MODEL_ARCHIVE_MAP, - XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, XLMRobertaForMaskedLM, TransfoXLLMHeadModel, - TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, OpenAIGPTLMHeadModel, - OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, RobertaForMaskedLM, RobertaForSequenceClassification, - ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, CamembertForMaskedLM, - CamembertForSequenceClassification, - CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP, FlaubertWithLMHeadModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering, - DistilBertForSequenceClassification, - DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel, - CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, AlbertForPreTraining, - ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, T5ForConditionalGeneration, - T5_PRETRAINED_MODEL_ARCHIVE_MAP, ElectraForPreTraining, - ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, - ) -else: - ( - BertForPreTraining, - BertForQuestionAnswering, - BertForSequenceClassification, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, - GPT2LMHeadModel, - GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, - XLNetLMHeadModel, - XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, - XLMWithLMHeadModel, - XLM_PRETRAINED_MODEL_ARCHIVE_MAP, - XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, - XLMRobertaForMaskedLM, - TransfoXLLMHeadModel, - TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, - OpenAIGPTLMHeadModel, - OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, - RobertaForMaskedLM, - RobertaForSequenceClassification, - ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, - CamembertForMaskedLM, - CamembertForSequenceClassification, - CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - FlaubertWithLMHeadModel, - DistilBertForMaskedLM, - DistilBertForSequenceClassification, - DistilBertForQuestionAnswering, - DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - CTRLLMHeadModel, - CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, - AlbertForPreTraining, - ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - T5ForConditionalGeneration, - T5_PRETRAINED_MODEL_ARCHIVE_MAP, - ElectraForPreTraining, - ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, - ) = ( - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, ) logging.basicConfig(level=logging.INFO) MODEL_CLASSES = { - "bert": ( - BertConfig, - TFBertForPreTraining, - BertForPreTraining, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, - BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), + "bert": (BertConfig, TFBertForPreTraining, BertForPreTraining, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,), "bert-large-uncased-whole-word-masking-finetuned-squad": ( BertConfig, TFBertForQuestionAnswering, BertForQuestionAnswering, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), "bert-large-cased-whole-word-masking-finetuned-squad": ( BertConfig, TFBertForQuestionAnswering, BertForQuestionAnswering, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), "bert-base-cased-finetuned-mrpc": ( BertConfig, TFBertForSequenceClassification, BertForSequenceClassification, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), - "gpt2": ( - GPT2Config, - TFGPT2LMHeadModel, - GPT2LMHeadModel, - GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, - GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), - "xlnet": ( - XLNetConfig, - TFXLNetLMHeadModel, - XLNetLMHeadModel, - XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, - XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), - "xlm": ( - XLMConfig, - TFXLMWithLMHeadModel, - XLMWithLMHeadModel, - XLM_PRETRAINED_MODEL_ARCHIVE_MAP, - XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), + "gpt2": (GPT2Config, TFGPT2LMHeadModel, GPT2LMHeadModel, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,), + "xlnet": (XLNetConfig, TFXLNetLMHeadModel, XLNetLMHeadModel, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,), + "xlm": (XLMConfig, TFXLMWithLMHeadModel, XLMWithLMHeadModel, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,), "xlm-roberta": ( XLMRobertaConfig, TFXLMRobertaForMaskedLM, XLMRobertaForMaskedLM, - XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, ), "transfo-xl": ( TransfoXLConfig, TFTransfoXLLMHeadModel, TransfoXLLMHeadModel, - TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, ), "openai-gpt": ( OpenAIGPTConfig, TFOpenAIGPTLMHeadModel, OpenAIGPTLMHeadModel, - OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), - "roberta": ( - RobertaConfig, - TFRobertaForMaskedLM, - RobertaForMaskedLM, - ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, - ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), + "roberta": (RobertaConfig, TFRobertaForMaskedLM, RobertaForMaskedLM, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,), "roberta-large-mnli": ( RobertaConfig, TFRobertaForSequenceClassification, RobertaForSequenceClassification, - ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, ), "camembert": ( CamembertConfig, TFCamembertForMaskedLM, CamembertForMaskedLM, - CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), "flaubert": ( FlaubertConfig, TFFlaubertWithLMHeadModel, FlaubertWithLMHeadModel, - FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP, FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), "distilbert": ( DistilBertConfig, TFDistilBertForMaskedLM, DistilBertForMaskedLM, - DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), "distilbert-base-distilled-squad": ( DistilBertConfig, TFDistilBertForQuestionAnswering, DistilBertForQuestionAnswering, - DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ), - "ctrl": ( - CTRLConfig, - TFCTRLLMHeadModel, - CTRLLMHeadModel, - CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, - CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), - "albert": ( - AlbertConfig, - TFAlbertForPreTraining, - AlbertForPreTraining, - ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), - "t5": ( - T5Config, - TFT5ForConditionalGeneration, - T5ForConditionalGeneration, - T5_PRETRAINED_MODEL_ARCHIVE_MAP, - T5_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), - "electra": ( - ElectraConfig, - TFElectraForPreTraining, - ElectraForPreTraining, - ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, - ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, - ), + "ctrl": (CTRLConfig, TFCTRLLMHeadModel, CTRLLMHeadModel, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,), + "albert": (AlbertConfig, TFAlbertForPreTraining, AlbertForPreTraining, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,), + "t5": (T5Config, TFT5ForConditionalGeneration, T5ForConditionalGeneration, T5_PRETRAINED_CONFIG_ARCHIVE_MAP,), + "electra": (ElectraConfig, TFElectraForPreTraining, ElectraForPreTraining, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,), } @@ -346,7 +190,7 @@ def convert_pt_checkpoint_to_tf( if model_type not in MODEL_CLASSES: raise ValueError("Unrecognized model type, should be one of {}.".format(list(MODEL_CLASSES.keys()))) - config_class, model_class, pt_model_class, aws_model_maps, aws_config_map = MODEL_CLASSES[model_type] + config_class, model_class, pt_model_class, aws_config_map = MODEL_CLASSES[model_type] # Initialise TF model if config_file in aws_config_map: @@ -358,10 +202,9 @@ def convert_pt_checkpoint_to_tf( tf_model = model_class(config) # Load weights from tf checkpoint - if pytorch_checkpoint_path in aws_model_maps: - pytorch_checkpoint_path = cached_path( - aws_model_maps[pytorch_checkpoint_path], force_download=not use_cached_models - ) + if pytorch_checkpoint_path in aws_config_map.keys(): + pytorch_checkpoint_url = hf_bucket_url(pytorch_checkpoint_path, filename=WEIGHTS_NAME) + pytorch_checkpoint_path = cached_path(pytorch_checkpoint_url, force_download=not use_cached_models) # Load PyTorch checkpoint in tf2 model: tf_model = load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_path) diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index 4461c888a5..2cf4b34f24 100644 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -31,16 +31,17 @@ from .modeling_utils import PreTrainedModel logger = logging.getLogger(__name__) -ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - "albert-base-v1": "https://cdn.huggingface.co/albert-base-v1-pytorch_model.bin", - "albert-large-v1": "https://cdn.huggingface.co/albert-large-v1-pytorch_model.bin", - "albert-xlarge-v1": "https://cdn.huggingface.co/albert-xlarge-v1-pytorch_model.bin", - "albert-xxlarge-v1": "https://cdn.huggingface.co/albert-xxlarge-v1-pytorch_model.bin", - "albert-base-v2": "https://cdn.huggingface.co/albert-base-v2-pytorch_model.bin", - "albert-large-v2": "https://cdn.huggingface.co/albert-large-v2-pytorch_model.bin", - "albert-xlarge-v2": "https://cdn.huggingface.co/albert-xlarge-v2-pytorch_model.bin", - "albert-xxlarge-v2": "https://cdn.huggingface.co/albert-xxlarge-v2-pytorch_model.bin", -} +ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "albert-base-v1", + "albert-large-v1", + "albert-xlarge-v1", + "albert-xxlarge-v1", + "albert-base-v2", + "albert-large-v2", + "albert-xlarge-v2", + "albert-xxlarge-v2", + # See all ALBERT models at https://huggingface.co/models?filter=albert +] def load_tf_weights_in_albert(model, config, tf_checkpoint_path): @@ -365,7 +366,6 @@ class AlbertPreTrainedModel(PreTrainedModel): """ config_class = AlbertConfig - pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "albert" def _init_weights(self, module): @@ -439,7 +439,6 @@ ALBERT_INPUTS_DOCSTRING = r""" class AlbertModel(AlbertPreTrainedModel): config_class = AlbertConfig - pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_albert base_model_prefix = "albert" diff --git a/src/transformers/modeling_auto.py b/src/transformers/modeling_auto.py index de97af688c..278bdbb39c 100644 --- a/src/transformers/modeling_auto.py +++ b/src/transformers/modeling_auto.py @@ -43,7 +43,6 @@ from .configuration_auto import ( from .configuration_marian import MarianConfig from .configuration_utils import PretrainedConfig from .modeling_albert import ( - ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, AlbertForMaskedLM, AlbertForPreTraining, AlbertForQuestionAnswering, @@ -51,14 +50,8 @@ from .modeling_albert import ( AlbertForTokenClassification, AlbertModel, ) -from .modeling_bart import ( - BART_PRETRAINED_MODEL_ARCHIVE_MAP, - BartForConditionalGeneration, - BartForSequenceClassification, - BartModel, -) +from .modeling_bart import BartForConditionalGeneration, BartForSequenceClassification, BartModel from .modeling_bert import ( - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, BertForMaskedLM, BertForMultipleChoice, BertForPreTraining, @@ -68,16 +61,14 @@ from .modeling_bert import ( BertModel, ) from .modeling_camembert import ( - CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, CamembertForMaskedLM, CamembertForMultipleChoice, CamembertForSequenceClassification, CamembertForTokenClassification, CamembertModel, ) -from .modeling_ctrl import CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel, CTRLModel +from .modeling_ctrl import CTRLLMHeadModel, CTRLModel from .modeling_distilbert import ( - DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, DistilBertForMaskedLM, DistilBertForQuestionAnswering, DistilBertForSequenceClassification, @@ -85,7 +76,6 @@ from .modeling_distilbert import ( DistilBertModel, ) from .modeling_electra import ( - ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, ElectraForMaskedLM, ElectraForPreTraining, ElectraForSequenceClassification, @@ -94,15 +84,13 @@ from .modeling_electra import ( ) from .modeling_encoder_decoder import EncoderDecoderModel from .modeling_flaubert import ( - FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP, FlaubertForQuestionAnsweringSimple, FlaubertForSequenceClassification, FlaubertModel, FlaubertWithLMHeadModel, ) -from .modeling_gpt2 import GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, GPT2LMHeadModel, GPT2Model +from .modeling_gpt2 import GPT2LMHeadModel, GPT2Model from .modeling_longformer import ( - LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP, LongformerForMaskedLM, LongformerForMultipleChoice, LongformerForQuestionAnswering, @@ -111,10 +99,9 @@ from .modeling_longformer import ( LongformerModel, ) from .modeling_marian import MarianMTModel -from .modeling_openai import OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, OpenAIGPTLMHeadModel, OpenAIGPTModel +from .modeling_openai import OpenAIGPTLMHeadModel, OpenAIGPTModel from .modeling_reformer import ReformerModel, ReformerModelWithLMHead from .modeling_roberta import ( - ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, RobertaForMaskedLM, RobertaForMultipleChoice, RobertaForQuestionAnswering, @@ -122,10 +109,9 @@ from .modeling_roberta import ( RobertaForTokenClassification, RobertaModel, ) -from .modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP, T5ForConditionalGeneration, T5Model -from .modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, TransfoXLLMHeadModel, TransfoXLModel +from .modeling_t5 import T5ForConditionalGeneration, T5Model +from .modeling_transfo_xl import TransfoXLLMHeadModel, TransfoXLModel from .modeling_xlm import ( - XLM_PRETRAINED_MODEL_ARCHIVE_MAP, XLMForQuestionAnsweringSimple, XLMForSequenceClassification, XLMForTokenClassification, @@ -133,7 +119,6 @@ from .modeling_xlm import ( XLMWithLMHeadModel, ) from .modeling_xlm_roberta import ( - XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, XLMRobertaForMaskedLM, XLMRobertaForMultipleChoice, XLMRobertaForSequenceClassification, @@ -141,7 +126,6 @@ from .modeling_xlm_roberta import ( XLMRobertaModel, ) from .modeling_xlnet import ( - XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, XLNetForMultipleChoice, XLNetForQuestionAnsweringSimple, XLNetForSequenceClassification, @@ -154,30 +138,6 @@ from .modeling_xlnet import ( logger = logging.getLogger(__name__) -ALL_PRETRAINED_MODEL_ARCHIVE_MAP = dict( - (key, value) - for pretrained_map in [ - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, - BART_PRETRAINED_MODEL_ARCHIVE_MAP, - OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, - TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, - GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, - CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, - XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, - XLM_PRETRAINED_MODEL_ARCHIVE_MAP, - ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, - DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - T5_PRETRAINED_MODEL_ARCHIVE_MAP, - FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, - ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, - LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP, - ] - for key, value, in pretrained_map.items() -) - MODEL_MAPPING = OrderedDict( [ (T5Config, T5Model), @@ -372,29 +332,26 @@ class AutoModel: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: :class:`~transformers.T5Model` (T5 model) + - `distilbert`: :class:`~transformers.DistilBertModel` (DistilBERT model) + - `albert`: :class:`~transformers.AlbertModel` (ALBERT model) + - `camembert`: :class:`~transformers.CamembertModel` (CamemBERT model) + - `xlm-roberta`: :class:`~transformers.XLMRobertaModel` (XLM-RoBERTa model) + - `longformer` :class:`~transformers.LongformerModel` (Longformer model) + - `roberta`: :class:`~transformers.RobertaModel` (RoBERTa model) + - `bert`: :class:`~transformers.BertModel` (Bert model) + - `openai-gpt`: :class:`~transformers.OpenAIGPTModel` (OpenAI GPT model) + - `gpt2`: :class:`~transformers.GPT2Model` (OpenAI GPT-2 model) + - `transfo-xl`: :class:`~transformers.TransfoXLModel` (Transformer-XL model) + - `xlnet`: :class:`~transformers.XLNetModel` (XLNet model) + - `xlm`: :class:`~transformers.XLMModel` (XLM model) + - `ctrl`: :class:`~transformers.CTRLModel` (Salesforce CTRL model) + - `flaubert`: :class:`~transformers.FlaubertModel` (Flaubert model) + - `electra`: :class:`~transformers.ElectraModel` (Electra model) - The base model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: :class:`~transformers.T5Model` (T5 model) - - contains `distilbert`: :class:`~transformers.DistilBertModel` (DistilBERT model) - - contains `albert`: :class:`~transformers.AlbertModel` (ALBERT model) - - contains `camembert`: :class:`~transformers.CamembertModel` (CamemBERT model) - - contains `xlm-roberta`: :class:`~transformers.XLMRobertaModel` (XLM-RoBERTa model) - - contains `longformer` :class:`~transformers.LongformerModel` (Longformer model) - - contains `roberta`: :class:`~transformers.RobertaModel` (RoBERTa model) - - contains `bert`: :class:`~transformers.BertModel` (Bert model) - - contains `openai-gpt`: :class:`~transformers.OpenAIGPTModel` (OpenAI GPT model) - - contains `gpt2`: :class:`~transformers.GPT2Model` (OpenAI GPT-2 model) - - contains `transfo-xl`: :class:`~transformers.TransfoXLModel` (Transformer-XL model) - - contains `xlnet`: :class:`~transformers.XLNetModel` (XLNet model) - - contains `xlm`: :class:`~transformers.XLMModel` (XLM model) - - contains `ctrl`: :class:`~transformers.CTRLModel` (Salesforce CTRL model) - - contains `flaubert`: :class:`~transformers.FlaubertModel` (Flaubert model) - - contains `electra`: :class:`~transformers.ElectraModel` (Electra model) - - The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) - To train the model, you should first set it back in training mode with `model.train()` + The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) + To train the model, you should first set it back in training mode with `model.train()` Args: pretrained_model_name_or_path: either: @@ -528,26 +485,23 @@ class AutoModelForPreTraining: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model) - - contains `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model) - - contains `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model) - - contains `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model) - - contains `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model) - - contains `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model) - - contains `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model) - - contains `bert`: :class:`~transformers.BertForPreTraining` (Bert model) - - contains `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model) - - contains `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model) - - contains `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model) - - contains `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model) - - contains `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model) - - contains `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model) - - contains `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model) - - contains `electra`: :class:`~transformers.ElectraForPreTraining` (Electra model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model) + - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model) + - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model) + - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model) + - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model) + - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model) + - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model) + - `bert`: :class:`~transformers.BertForPreTraining` (Bert model) + - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model) + - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model) + - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model) + - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model) + - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model) + - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model) + - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model) + - `electra`: :class:`~transformers.ElectraForPreTraining` (Electra model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` @@ -679,26 +633,23 @@ class AutoModelWithLMHead: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model) - - contains `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model) - - contains `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model) - - contains `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model) - - contains `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model) - - contains `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model) - - contains `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model) - - contains `bert`: :class:`~transformers.BertForMaskedLM` (Bert model) - - contains `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model) - - contains `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model) - - contains `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model) - - contains `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model) - - contains `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model) - - contains `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model) - - contains `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model) - - contains `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model) + - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model) + - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model) + - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model) + - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model) + - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model) + - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model) + - `bert`: :class:`~transformers.BertForMaskedLM` (Bert model) + - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model) + - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model) + - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model) + - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model) + - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model) + - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model) + - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model) + - `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` @@ -830,18 +781,15 @@ class AutoModelForSequenceClassification: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `distilbert`: :class:`~transformers.DistilBertForSequenceClassification` (DistilBERT model) - - contains `albert`: :class:`~transformers.AlbertForSequenceClassification` (ALBERT model) - - contains `camembert`: :class:`~transformers.CamembertForSequenceClassification` (CamemBERT model) - - contains `xlm-roberta`: :class:`~transformers.XLMRobertaForSequenceClassification` (XLM-RoBERTa model) - - contains `roberta`: :class:`~transformers.RobertaForSequenceClassification` (RoBERTa model) - - contains `bert`: :class:`~transformers.BertForSequenceClassification` (Bert model) - - contains `xlnet`: :class:`~transformers.XLNetForSequenceClassification` (XLNet model) - - contains `flaubert`: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `distilbert`: :class:`~transformers.DistilBertForSequenceClassification` (DistilBERT model) + - `albert`: :class:`~transformers.AlbertForSequenceClassification` (ALBERT model) + - `camembert`: :class:`~transformers.CamembertForSequenceClassification` (CamemBERT model) + - `xlm-roberta`: :class:`~transformers.XLMRobertaForSequenceClassification` (XLM-RoBERTa model) + - `roberta`: :class:`~transformers.RobertaForSequenceClassification` (RoBERTa model) + - `bert`: :class:`~transformers.BertForSequenceClassification` (Bert model) + - `xlnet`: :class:`~transformers.XLNetForSequenceClassification` (XLNet model) + - `flaubert`: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` @@ -979,16 +927,13 @@ class AutoModelForQuestionAnswering: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `distilbert`: :class:`~transformers.DistilBertForQuestionAnswering` (DistilBERT model) - - contains `albert`: :class:`~transformers.AlbertForQuestionAnswering` (ALBERT model) - - contains `bert`: :class:`~transformers.BertForQuestionAnswering` (Bert model) - - contains `xlnet`: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model) - - contains `xlm`: :class:`~transformers.XLMForQuestionAnswering` (XLM model) - - contains `flaubert`: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `distilbert`: :class:`~transformers.DistilBertForQuestionAnswering` (DistilBERT model) + - `albert`: :class:`~transformers.AlbertForQuestionAnswering` (ALBERT model) + - `bert`: :class:`~transformers.BertForQuestionAnswering` (Bert model) + - `xlnet`: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model) + - `xlm`: :class:`~transformers.XLMForQuestionAnswering` (XLM model) + - `flaubert`: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` @@ -1127,18 +1072,15 @@ class AutoModelForTokenClassification: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `distilbert`: :class:`~transformers.DistilBertForTokenClassification` (DistilBERT model) - - contains `xlm`: :class:`~transformers.XLMForTokenClassification` (XLM model) - - contains `xlm-roberta`: :class:`~transformers.XLMRobertaForTokenClassification` (XLM-RoBERTa?Para model) - - contains `camembert`: :class:`~transformers.CamembertForTokenClassification` (Camembert model) - - contains `bert`: :class:`~transformers.BertForTokenClassification` (Bert model) - - contains `xlnet`: :class:`~transformers.XLNetForTokenClassification` (XLNet model) - - contains `roberta`: :class:`~transformers.RobertaForTokenClassification` (Roberta model) - - contains `electra`: :class:`~transformers.ElectraForTokenClassification` (Electra model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `distilbert`: :class:`~transformers.DistilBertForTokenClassification` (DistilBERT model) + - `xlm`: :class:`~transformers.XLMForTokenClassification` (XLM model) + - `xlm-roberta`: :class:`~transformers.XLMRobertaForTokenClassification` (XLM-RoBERTa?Para model) + - `camembert`: :class:`~transformers.CamembertForTokenClassification` (Camembert model) + - `bert`: :class:`~transformers.BertForTokenClassification` (Bert model) + - `xlnet`: :class:`~transformers.XLNetForTokenClassification` (XLNet model) + - `roberta`: :class:`~transformers.RobertaForTokenClassification` (Roberta model) + - `electra`: :class:`~transformers.ElectraForTokenClassification` (Electra model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` diff --git a/src/transformers/modeling_bart.py b/src/transformers/modeling_bart.py index 654067b228..929944b248 100644 --- a/src/transformers/modeling_bart.py +++ b/src/transformers/modeling_bart.py @@ -32,13 +32,15 @@ from .modeling_utils import PreTrainedModel, create_position_ids_from_input_ids logger = logging.getLogger(__name__) -BART_PRETRAINED_MODEL_ARCHIVE_MAP = { - "bart-large": "https://cdn.huggingface.co/facebook/bart-large/pytorch_model.bin", - "bart-large-mnli": "https://cdn.huggingface.co/facebook/bart-large-mnli/pytorch_model.bin", - "bart-large-cnn": "https://cdn.huggingface.co/facebook/bart-large-cnn/pytorch_model.bin", - "bart-large-xsum": "https://cdn.huggingface.co/facebook/bart-large-xsum/pytorch_model.bin", - "mbart-large-en-ro": "https://cdn.huggingface.co/facebook/mbart-large-en-ro/pytorch_model.bin", -} +BART_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "facebook/bart-large", + "facebook/bart-large-mnli", + "facebook/bart-large-cnn", + "facebook/bart-large-xsum", + "facebook/mbart-large-en-ro", + # See all BART models at https://huggingface.co/models?filter=bart +] + BART_START_DOCSTRING = r""" @@ -118,7 +120,6 @@ def _prepare_bart_decoder_inputs( class PretrainedBartModel(PreTrainedModel): config_class = BartConfig base_model_prefix = "model" - pretrained_model_archive_map = BART_PRETRAINED_MODEL_ARCHIVE_MAP def _init_weights(self, module): std = self.config.init_std diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index a7c6ea6c0b..877bb55fc2 100644 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -32,30 +32,31 @@ from .modeling_utils import PreTrainedModel, prune_linear_layer logger = logging.getLogger(__name__) -BERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - "bert-base-uncased": "https://cdn.huggingface.co/bert-base-uncased-pytorch_model.bin", - "bert-large-uncased": "https://cdn.huggingface.co/bert-large-uncased-pytorch_model.bin", - "bert-base-cased": "https://cdn.huggingface.co/bert-base-cased-pytorch_model.bin", - "bert-large-cased": "https://cdn.huggingface.co/bert-large-cased-pytorch_model.bin", - "bert-base-multilingual-uncased": "https://cdn.huggingface.co/bert-base-multilingual-uncased-pytorch_model.bin", - "bert-base-multilingual-cased": "https://cdn.huggingface.co/bert-base-multilingual-cased-pytorch_model.bin", - "bert-base-chinese": "https://cdn.huggingface.co/bert-base-chinese-pytorch_model.bin", - "bert-base-german-cased": "https://cdn.huggingface.co/bert-base-german-cased-pytorch_model.bin", - "bert-large-uncased-whole-word-masking": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-pytorch_model.bin", - "bert-large-cased-whole-word-masking": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-pytorch_model.bin", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad-pytorch_model.bin", - "bert-base-cased-finetuned-mrpc": "https://cdn.huggingface.co/bert-base-cased-finetuned-mrpc-pytorch_model.bin", - "bert-base-german-dbmdz-cased": "https://cdn.huggingface.co/bert-base-german-dbmdz-cased-pytorch_model.bin", - "bert-base-german-dbmdz-uncased": "https://cdn.huggingface.co/bert-base-german-dbmdz-uncased-pytorch_model.bin", - "bert-base-japanese": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese/pytorch_model.bin", - "bert-base-japanese-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/pytorch_model.bin", - "bert-base-japanese-char": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char/pytorch_model.bin", - "bert-base-japanese-char-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/pytorch_model.bin", - "bert-base-finnish-cased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/pytorch_model.bin", - "bert-base-finnish-uncased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/pytorch_model.bin", - "bert-base-dutch-cased": "https://cdn.huggingface.co/wietsedv/bert-base-dutch-cased/pytorch_model.bin", -} +BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "bert-base-uncased", + "bert-large-uncased", + "bert-base-cased", + "bert-large-cased", + "bert-base-multilingual-uncased", + "bert-base-multilingual-cased", + "bert-base-chinese", + "bert-base-german-cased", + "bert-large-uncased-whole-word-masking", + "bert-large-cased-whole-word-masking", + "bert-large-uncased-whole-word-masking-finetuned-squad", + "bert-large-cased-whole-word-masking-finetuned-squad", + "bert-base-cased-finetuned-mrpc", + "bert-base-german-dbmdz-cased", + "bert-base-german-dbmdz-uncased", + "cl-tohoku/bert-base-japanese", + "cl-tohoku/bert-base-japanese-whole-word-masking", + "cl-tohoku/bert-base-japanese-char", + "cl-tohoku/bert-base-japanese-char-whole-word-masking", + "TurkuNLP/bert-base-finnish-cased-v1", + "TurkuNLP/bert-base-finnish-uncased-v1", + "wietsedv/bert-base-dutch-cased", + # See all BERT models at https://huggingface.co/models?filter=bert +] def load_tf_weights_in_bert(model, config, tf_checkpoint_path): @@ -513,7 +514,6 @@ class BertPreTrainedModel(PreTrainedModel): """ config_class = BertConfig - pretrained_model_archive_map = BERT_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_bert base_model_prefix = "bert" diff --git a/src/transformers/modeling_camembert.py b/src/transformers/modeling_camembert.py index 511c4abf7c..f341bd943a 100644 --- a/src/transformers/modeling_camembert.py +++ b/src/transformers/modeling_camembert.py @@ -31,11 +31,12 @@ from .modeling_roberta import ( logger = logging.getLogger(__name__) -CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - "camembert-base": "https://cdn.huggingface.co/camembert-base-pytorch_model.bin", - "umberto-commoncrawl-cased-v1": "https://cdn.huggingface.co/Musixmatch/umberto-commoncrawl-cased-v1/pytorch_model.bin", - "umberto-wikipedia-uncased-v1": "https://cdn.huggingface.co/Musixmatch/umberto-wikipedia-uncased-v1/pytorch_model.bin", -} +CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "camembert-base", + "Musixmatch/umberto-commoncrawl-cased-v1", + "Musixmatch/umberto-wikipedia-uncased-v1", + # See all CamemBERT models at https://huggingface.co/models?filter=camembert +] CAMEMBERT_START_DOCSTRING = r""" @@ -62,7 +63,6 @@ class CamembertModel(RobertaModel): """ config_class = CamembertConfig - pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -75,7 +75,6 @@ class CamembertForMaskedLM(RobertaForMaskedLM): """ config_class = CamembertConfig - pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -90,7 +89,6 @@ class CamembertForSequenceClassification(RobertaForSequenceClassification): """ config_class = CamembertConfig - pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -105,7 +103,6 @@ class CamembertForMultipleChoice(RobertaForMultipleChoice): """ config_class = CamembertConfig - pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -120,7 +117,6 @@ class CamembertForTokenClassification(RobertaForTokenClassification): """ config_class = CamembertConfig - pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -135,4 +131,3 @@ class CamembertForQuestionAnswering(RobertaForQuestionAnswering): """ config_class = CamembertConfig - pretrained_model_archive_map = CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP diff --git a/src/transformers/modeling_ctrl.py b/src/transformers/modeling_ctrl.py index b5ea24714a..4f2d063f1b 100644 --- a/src/transformers/modeling_ctrl.py +++ b/src/transformers/modeling_ctrl.py @@ -30,7 +30,10 @@ from .modeling_utils import Conv1D, PreTrainedModel logger = logging.getLogger(__name__) -CTRL_PRETRAINED_MODEL_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf-ctrl/pytorch/seqlen256_v1.bin"} +CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "ctrl" + # See all CTRL models at https://huggingface.co/models?filter=ctrl +] def angle_defn(pos, i, d_model_size): @@ -178,7 +181,6 @@ class CTRLPreTrainedModel(PreTrainedModel): """ config_class = CTRLConfig - pretrained_model_archive_map = CTRL_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" def _init_weights(self, module): diff --git a/src/transformers/modeling_distilbert.py b/src/transformers/modeling_distilbert.py index 86470fcd6b..ee74bd6744 100644 --- a/src/transformers/modeling_distilbert.py +++ b/src/transformers/modeling_distilbert.py @@ -36,15 +36,16 @@ from .modeling_utils import PreTrainedModel, prune_linear_layer logger = logging.getLogger(__name__) -DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - "distilbert-base-uncased": "https://cdn.huggingface.co/distilbert-base-uncased-pytorch_model.bin", - "distilbert-base-uncased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-uncased-distilled-squad-pytorch_model.bin", - "distilbert-base-cased": "https://cdn.huggingface.co/distilbert-base-cased-pytorch_model.bin", - "distilbert-base-cased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-cased-distilled-squad-pytorch_model.bin", - "distilbert-base-german-cased": "https://cdn.huggingface.co/distilbert-base-german-cased-pytorch_model.bin", - "distilbert-base-multilingual-cased": "https://cdn.huggingface.co/distilbert-base-multilingual-cased-pytorch_model.bin", - "distilbert-base-uncased-finetuned-sst-2-english": "https://cdn.huggingface.co/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin", -} +DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "distilbert-base-uncased", + "distilbert-base-uncased-distilled-squad", + "distilbert-base-cased", + "distilbert-base-cased-distilled-squad", + "distilbert-base-german-cased", + "distilbert-base-multilingual-cased", + "distilbert-base-uncased-finetuned-sst-2-english", + # See all DistilBERT models at https://huggingface.co/models?filter=distilbert +] # UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE # @@ -327,7 +328,6 @@ class DistilBertPreTrainedModel(PreTrainedModel): """ config_class = DistilBertConfig - pretrained_model_archive_map = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = None base_model_prefix = "distilbert" diff --git a/src/transformers/modeling_electra.py b/src/transformers/modeling_electra.py index 46b6a418d1..b1c346c6e4 100644 --- a/src/transformers/modeling_electra.py +++ b/src/transformers/modeling_electra.py @@ -14,14 +14,15 @@ from .modeling_bert import BertEmbeddings, BertEncoder, BertLayerNorm, BertPreTr logger = logging.getLogger(__name__) -ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP = { - "google/electra-small-generator": "https://cdn.huggingface.co/google/electra-small-generator/pytorch_model.bin", - "google/electra-base-generator": "https://cdn.huggingface.co/google/electra-base-generator/pytorch_model.bin", - "google/electra-large-generator": "https://cdn.huggingface.co/google/electra-large-generator/pytorch_model.bin", - "google/electra-small-discriminator": "https://cdn.huggingface.co/google/electra-small-discriminator/pytorch_model.bin", - "google/electra-base-discriminator": "https://cdn.huggingface.co/google/electra-base-discriminator/pytorch_model.bin", - "google/electra-large-discriminator": "https://cdn.huggingface.co/google/electra-large-discriminator/pytorch_model.bin", -} +ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "google/electra-small-generator", + "google/electra-base-generator", + "google/electra-large-generator", + "google/electra-small-discriminator", + "google/electra-base-discriminator", + "google/electra-large-discriminator", + # See all ELECTRA models at https://huggingface.co/models?filter=electra +] def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_or_generator="discriminator"): @@ -160,7 +161,6 @@ class ElectraPreTrainedModel(BertPreTrainedModel): """ config_class = ElectraConfig - pretrained_model_archive_map = ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_electra base_model_prefix = "electra" diff --git a/src/transformers/modeling_flaubert.py b/src/transformers/modeling_flaubert.py index 3a8d98cbcc..24abd11580 100644 --- a/src/transformers/modeling_flaubert.py +++ b/src/transformers/modeling_flaubert.py @@ -35,12 +35,13 @@ from .modeling_xlm import ( logger = logging.getLogger(__name__) -FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - "flaubert-small-cased": "https://cdn.huggingface.co/flaubert/flaubert_small_cased/pytorch_model.bin", - "flaubert-base-uncased": "https://cdn.huggingface.co/flaubert/flaubert_base_uncased/pytorch_model.bin", - "flaubert-base-cased": "https://cdn.huggingface.co/flaubert/flaubert_base_cased/pytorch_model.bin", - "flaubert-large-cased": "https://cdn.huggingface.co/flaubert/flaubert_large_cased/pytorch_model.bin", -} +FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "flaubert/flaubert_small_cased", + "flaubert/flaubert_base_uncased", + "flaubert/flaubert_base_cased", + "flaubert/flaubert_large_cased", + # See all Flaubert models at https://huggingface.co/models?filter=flaubert +] FLAUBERT_START_DOCSTRING = r""" @@ -109,7 +110,6 @@ FLAUBERT_INPUTS_DOCSTRING = r""" class FlaubertModel(XLMModel): config_class = FlaubertConfig - pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP def __init__(self, config): # , dico, is_encoder, with_output): super().__init__(config) @@ -304,7 +304,6 @@ class FlaubertWithLMHeadModel(XLMWithLMHeadModel): """ config_class = FlaubertConfig - pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP def __init__(self, config): super().__init__(config) @@ -324,7 +323,6 @@ class FlaubertForSequenceClassification(XLMForSequenceClassification): """ config_class = FlaubertConfig - pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP def __init__(self, config): super().__init__(config) @@ -344,7 +342,6 @@ class FlaubertForQuestionAnsweringSimple(XLMForQuestionAnsweringSimple): """ config_class = FlaubertConfig - pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP def __init__(self, config): super().__init__(config) @@ -364,7 +361,6 @@ class FlaubertForQuestionAnswering(XLMForQuestionAnswering): """ config_class = FlaubertConfig - pretrained_model_archive_map = FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP def __init__(self, config): super().__init__(config) diff --git a/src/transformers/modeling_gpt2.py b/src/transformers/modeling_gpt2.py index 8e8cd2db6e..c59cc506a5 100644 --- a/src/transformers/modeling_gpt2.py +++ b/src/transformers/modeling_gpt2.py @@ -31,13 +31,14 @@ from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv logger = logging.getLogger(__name__) -GPT2_PRETRAINED_MODEL_ARCHIVE_MAP = { - "gpt2": "https://cdn.huggingface.co/gpt2-pytorch_model.bin", - "gpt2-medium": "https://cdn.huggingface.co/gpt2-medium-pytorch_model.bin", - "gpt2-large": "https://cdn.huggingface.co/gpt2-large-pytorch_model.bin", - "gpt2-xl": "https://cdn.huggingface.co/gpt2-xl-pytorch_model.bin", - "distilgpt2": "https://cdn.huggingface.co/distilgpt2-pytorch_model.bin", -} +GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "gpt2", + "gpt2-medium", + "gpt2-large", + "gpt2-xl", + "distilgpt2", + # See all GPT-2 models at https://huggingface.co/models?filter=gpt2 +] def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path): @@ -251,7 +252,6 @@ class GPT2PreTrainedModel(PreTrainedModel): """ config_class = GPT2Config - pretrained_model_archive_map = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_gpt2 base_model_prefix = "transformer" diff --git a/src/transformers/modeling_longformer.py b/src/transformers/modeling_longformer.py index d254c115fa..89a677aa9b 100644 --- a/src/transformers/modeling_longformer.py +++ b/src/transformers/modeling_longformer.py @@ -30,13 +30,14 @@ from .modeling_roberta import RobertaLMHead, RobertaModel logger = logging.getLogger(__name__) -LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP = { - "allenai/longformer-base-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096/pytorch_model.bin", - "allenai/longformer-large-4096": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096/pytorch_model.bin", - "allenai/longformer-large-4096-finetuned-triviaqa": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-finetuned-triviaqa/pytorch_model.bin", - "allenai/longformer-base-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-base-4096-extra.pos.embd.only/pytorch_model.bin", - "allenai/longformer-large-4096-extra.pos.embd.only": "https://s3.amazonaws.com/models.huggingface.co/bert/allenai/longformer-large-4096-extra.pos.embd.only/pytorch_model.bin", -} +LONGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "allenai/longformer-base-4096", + "allenai/longformer-large-4096", + "allenai/longformer-large-4096-finetuned-triviaqa", + "allenai/longformer-base-4096-extra.pos.embd.only", + "allenai/longformer-large-4096-extra.pos.embd.only", + # See all Longformer models at https://huggingface.co/models?filter=longformer +] def _get_question_end_index(input_ids, sep_token_id): @@ -513,7 +514,6 @@ class LongformerModel(RobertaModel): """ config_class = LongformerConfig - pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "longformer" def __init__(self, config): @@ -685,7 +685,6 @@ class LongformerModel(RobertaModel): @add_start_docstrings("""Longformer Model with a `language modeling` head on top. """, LONGFORMER_START_DOCSTRING) class LongformerForMaskedLM(BertPreTrainedModel): config_class = LongformerConfig - pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "longformer" def __init__(self, config): @@ -776,7 +775,6 @@ class LongformerForMaskedLM(BertPreTrainedModel): ) class LongformerForSequenceClassification(BertPreTrainedModel): config_class = LongformerConfig - pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "longformer" def __init__(self, config): @@ -893,7 +891,6 @@ class LongformerClassificationHead(nn.Module): ) class LongformerForQuestionAnswering(BertPreTrainedModel): config_class = LongformerConfig - pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "longformer" def __init__(self, config): @@ -1018,7 +1015,6 @@ class LongformerForQuestionAnswering(BertPreTrainedModel): ) class LongformerForTokenClassification(BertPreTrainedModel): config_class = LongformerConfig - pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "longformer" def __init__(self, config): @@ -1119,7 +1115,6 @@ class LongformerForTokenClassification(BertPreTrainedModel): ) class LongformerForMultipleChoice(BertPreTrainedModel): config_class = LongformerConfig - pretrained_model_archive_map = LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "longformer" def __init__(self, config): diff --git a/src/transformers/modeling_marian.py b/src/transformers/modeling_marian.py index 701eedda43..65ef91f64d 100644 --- a/src/transformers/modeling_marian.py +++ b/src/transformers/modeling_marian.py @@ -18,6 +18,11 @@ from transformers.modeling_bart import BartForConditionalGeneration +MARIAN_PRETRAINED_MODEL_ARCHIVE_LIST = [ + # See all Marian models at https://huggingface.co/models?search=Helsinki-NLP +] + + class MarianMTModel(BartForConditionalGeneration): r""" Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints. @@ -41,8 +46,6 @@ class MarianMTModel(BartForConditionalGeneration): """ - pretrained_model_archive_map = {} # see https://huggingface.co/models?search=Helsinki-NLP - def prepare_logits_for_generation(self, logits, cur_len, max_length): logits[:, self.config.pad_token_id] = float("-inf") if cur_len == max_length - 1 and self.config.eos_token_id is not None: diff --git a/src/transformers/modeling_openai.py b/src/transformers/modeling_openai.py index f6a74c05df..077a1ca2a0 100644 --- a/src/transformers/modeling_openai.py +++ b/src/transformers/modeling_openai.py @@ -33,7 +33,10 @@ from .modeling_utils import Conv1D, PreTrainedModel, SequenceSummary, prune_conv logger = logging.getLogger(__name__) -OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP = {"openai-gpt": "https://cdn.huggingface.co/openai-gpt-pytorch_model.bin"} +OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "openai-gpt", + # See all OpenAI GPT models at https://huggingface.co/models?filter=openai-gpt +] def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path): @@ -252,7 +255,6 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel): """ config_class = OpenAIGPTConfig - pretrained_model_archive_map = OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_openai_gpt base_model_prefix = "transformer" diff --git a/src/transformers/modeling_reformer.py b/src/transformers/modeling_reformer.py index 8e32d06ff6..d48823a412 100644 --- a/src/transformers/modeling_reformer.py +++ b/src/transformers/modeling_reformer.py @@ -35,10 +35,11 @@ from .modeling_utils import PreTrainedModel, apply_chunking_to_forward logger = logging.getLogger(__name__) -REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP = { - "google/reformer-crime-and-punishment": "https://cdn.huggingface.co/google/reformer-crime-and-punishment/pytorch_model.bin", - "google/reformer-enwik8": "https://cdn.huggingface.co/google/reformer-enwik8/pytorch_model.bin", -} +REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "google/reformer-crime-and-punishment", + "google/reformer-enwik8", + # See all Reformer models at https://huggingface.co/models?filter=reformer +] def mish(x): @@ -1373,7 +1374,6 @@ class ReformerPreTrainedModel(PreTrainedModel): """ config_class = ReformerConfig - pretrained_model_archive_map = REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "reformer" @property diff --git a/src/transformers/modeling_roberta.py b/src/transformers/modeling_roberta.py index 2d085e3a8a..175075a2fb 100644 --- a/src/transformers/modeling_roberta.py +++ b/src/transformers/modeling_roberta.py @@ -30,14 +30,15 @@ from .modeling_utils import create_position_ids_from_input_ids logger = logging.getLogger(__name__) -ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = { - "roberta-base": "https://cdn.huggingface.co/roberta-base-pytorch_model.bin", - "roberta-large": "https://cdn.huggingface.co/roberta-large-pytorch_model.bin", - "roberta-large-mnli": "https://cdn.huggingface.co/roberta-large-mnli-pytorch_model.bin", - "distilroberta-base": "https://cdn.huggingface.co/distilroberta-base-pytorch_model.bin", - "roberta-base-openai-detector": "https://cdn.huggingface.co/roberta-base-openai-detector-pytorch_model.bin", - "roberta-large-openai-detector": "https://cdn.huggingface.co/roberta-large-openai-detector-pytorch_model.bin", -} +ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "roberta-base", + "roberta-large", + "roberta-large-mnli", + "distilroberta-base", + "roberta-base-openai-detector", + "roberta-large-openai-detector", + # See all RoBERTa models at https://huggingface.co/models?filter=roberta +] class RobertaEmbeddings(BertEmbeddings): @@ -142,7 +143,6 @@ class RobertaModel(BertModel): """ config_class = RobertaConfig - pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "roberta" def __init__(self, config): @@ -161,7 +161,6 @@ class RobertaModel(BertModel): @add_start_docstrings("""RoBERTa Model with a `language modeling` head on top. """, ROBERTA_START_DOCSTRING) class RobertaForMaskedLM(BertPreTrainedModel): config_class = RobertaConfig - pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "roberta" def __init__(self, config): @@ -276,7 +275,6 @@ class RobertaLMHead(nn.Module): ) class RobertaForSequenceClassification(BertPreTrainedModel): config_class = RobertaConfig - pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "roberta" def __init__(self, config): @@ -367,7 +365,6 @@ class RobertaForSequenceClassification(BertPreTrainedModel): ) class RobertaForMultipleChoice(BertPreTrainedModel): config_class = RobertaConfig - pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "roberta" def __init__(self, config): @@ -466,7 +463,6 @@ class RobertaForMultipleChoice(BertPreTrainedModel): ) class RobertaForTokenClassification(BertPreTrainedModel): config_class = RobertaConfig - pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "roberta" def __init__(self, config): @@ -586,7 +582,6 @@ class RobertaClassificationHead(nn.Module): ) class RobertaForQuestionAnswering(BertPreTrainedModel): config_class = RobertaConfig - pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "roberta" def __init__(self, config): diff --git a/src/transformers/modeling_t5.py b/src/transformers/modeling_t5.py index f5b74e7a79..90d094eb54 100644 --- a/src/transformers/modeling_t5.py +++ b/src/transformers/modeling_t5.py @@ -36,13 +36,14 @@ logger = logging.getLogger(__name__) # This dict contrains shortcut names and associated url # for the pretrained weights provided with the models #################################################### -T5_PRETRAINED_MODEL_ARCHIVE_MAP = { - "t5-small": "https://cdn.huggingface.co/t5-small-pytorch_model.bin", - "t5-base": "https://cdn.huggingface.co/t5-base-pytorch_model.bin", - "t5-large": "https://cdn.huggingface.co/t5-large-pytorch_model.bin", - "t5-3b": "https://cdn.huggingface.co/t5-3b-pytorch_model.bin", - "t5-11b": "https://cdn.huggingface.co/t5-11b-pytorch_model.bin", -} +T5_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "t5-small", + "t5-base", + "t5-large", + "t5-3b", + "t5-11b", + # See all T5 models at https://huggingface.co/models?filter=t5 +] #################################################### @@ -555,7 +556,6 @@ class T5PreTrainedModel(PreTrainedModel): """ config_class = T5Config - pretrained_model_archive_map = T5_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_t5 base_model_prefix = "transformer" diff --git a/src/transformers/modeling_tf_albert.py b/src/transformers/modeling_tf_albert.py index da7c3d458f..e2d33d329d 100644 --- a/src/transformers/modeling_tf_albert.py +++ b/src/transformers/modeling_tf_albert.py @@ -29,16 +29,17 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - "albert-base-v1": "https://cdn.huggingface.co/albert-base-v1-with-prefix-tf_model.h5", - "albert-large-v1": "https://cdn.huggingface.co/albert-large-v1-with-prefix-tf_model.h5", - "albert-xlarge-v1": "https://cdn.huggingface.co/albert-xlarge-v1-with-prefix-tf_model.h5", - "albert-xxlarge-v1": "https://cdn.huggingface.co/albert-xxlarge-v1-with-prefix-tf_model.h5", - "albert-base-v2": "https://cdn.huggingface.co/albert-base-v2-with-prefix-tf_model.h5", - "albert-large-v2": "https://cdn.huggingface.co/albert-large-v2-with-prefix-tf_model.h5", - "albert-xlarge-v2": "https://cdn.huggingface.co/albert-xlarge-v2-with-prefix-tf_model.h5", - "albert-xxlarge-v2": "https://cdn.huggingface.co/albert-xxlarge-v2-with-prefix-tf_model.h5", -} +TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "albert-base-v1", + "albert-large-v1", + "albert-xlarge-v1", + "albert-xxlarge-v1", + "albert-base-v2", + "albert-large-v2", + "albert-xlarge-v2", + "albert-xxlarge-v2", + # See all ALBERT models at https://huggingface.co/models?filter=albert +] class TFAlbertEmbeddings(tf.keras.layers.Layer): @@ -440,7 +441,6 @@ class TFAlbertPreTrainedModel(TFPreTrainedModel): """ config_class = AlbertConfig - pretrained_model_archive_map = TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "albert" diff --git a/src/transformers/modeling_tf_auto.py b/src/transformers/modeling_tf_auto.py index 8f40734edd..cd300019d8 100644 --- a/src/transformers/modeling_tf_auto.py +++ b/src/transformers/modeling_tf_auto.py @@ -34,7 +34,6 @@ from .configuration_auto import ( ) from .configuration_utils import PretrainedConfig from .modeling_tf_albert import ( - TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, TFAlbertForMaskedLM, TFAlbertForMultipleChoice, TFAlbertForPreTraining, @@ -43,7 +42,6 @@ from .modeling_tf_albert import ( TFAlbertModel, ) from .modeling_tf_bert import ( - TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP, TFBertForMaskedLM, TFBertForMultipleChoice, TFBertForPreTraining, @@ -52,40 +50,32 @@ from .modeling_tf_bert import ( TFBertForTokenClassification, TFBertModel, ) -from .modeling_tf_ctrl import TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, TFCTRLLMHeadModel, TFCTRLModel +from .modeling_tf_ctrl import TFCTRLLMHeadModel, TFCTRLModel from .modeling_tf_distilbert import ( - TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering, TFDistilBertForSequenceClassification, TFDistilBertForTokenClassification, TFDistilBertModel, ) -from .modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, TFGPT2LMHeadModel, TFGPT2Model -from .modeling_tf_openai import TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, TFOpenAIGPTLMHeadModel, TFOpenAIGPTModel +from .modeling_tf_gpt2 import TFGPT2LMHeadModel, TFGPT2Model +from .modeling_tf_openai import TFOpenAIGPTLMHeadModel, TFOpenAIGPTModel from .modeling_tf_roberta import ( - TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, TFRobertaForMaskedLM, TFRobertaForQuestionAnswering, TFRobertaForSequenceClassification, TFRobertaForTokenClassification, TFRobertaModel, ) -from .modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP, TFT5ForConditionalGeneration, TFT5Model -from .modeling_tf_transfo_xl import ( - TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, - TFTransfoXLLMHeadModel, - TFTransfoXLModel, -) +from .modeling_tf_t5 import TFT5ForConditionalGeneration, TFT5Model +from .modeling_tf_transfo_xl import TFTransfoXLLMHeadModel, TFTransfoXLModel from .modeling_tf_xlm import ( - TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP, TFXLMForQuestionAnsweringSimple, TFXLMForSequenceClassification, TFXLMModel, TFXLMWithLMHeadModel, ) from .modeling_tf_xlnet import ( - TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, TFXLNetForQuestionAnsweringSimple, TFXLNetForSequenceClassification, TFXLNetForTokenClassification, @@ -97,24 +87,6 @@ from .modeling_tf_xlnet import ( logger = logging.getLogger(__name__) -TF_ALL_PRETRAINED_MODEL_ARCHIVE_MAP = dict( - (key, value) - for pretrained_map in [ - TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, - TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP, - ] - for key, value, in pretrained_map.items() -) - TF_MODEL_MAPPING = OrderedDict( [ (T5Config, TFT5Model), @@ -208,20 +180,17 @@ class TFAutoModel(object): The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The base model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: TFT5Model (T5 model) - - contains `distilbert`: TFDistilBertModel (DistilBERT model) - - contains `roberta`: TFRobertaModel (RoBERTa model) - - contains `bert`: TFBertModel (Bert model) - - contains `openai-gpt`: TFOpenAIGPTModel (OpenAI GPT model) - - contains `gpt2`: TFGPT2Model (OpenAI GPT-2 model) - - contains `transfo-xl`: TFTransfoXLModel (Transformer-XL model) - - contains `xlnet`: TFXLNetModel (XLNet model) - - contains `xlm`: TFXLMModel (XLM model) - - contains `ctrl`: TFCTRLModel (CTRL model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: TFT5Model (T5 model) + - `distilbert`: TFDistilBertModel (DistilBERT model) + - `roberta`: TFRobertaModel (RoBERTa model) + - `bert`: TFBertModel (Bert model) + - `openai-gpt`: TFOpenAIGPTModel (OpenAI GPT model) + - `gpt2`: TFGPT2Model (OpenAI GPT-2 model) + - `transfo-xl`: TFTransfoXLModel (Transformer-XL model) + - `xlnet`: TFXLNetModel (XLNet model) + - `xlm`: TFXLMModel (XLM model) + - `ctrl`: TFCTRLModel (CTRL model) This class cannot be instantiated using `__init__()` (throws an error). """ @@ -276,17 +245,18 @@ class TFAutoModel(object): r""" Instantiates one of the base model classes of the library from a pre-trained model configuration. - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: TFT5Model (T5 model) - - contains `distilbert`: TFDistilBertModel (DistilBERT model) - - contains `roberta`: TFRobertaModel (RoBERTa model) - - contains `bert`: TFTFBertModel (Bert model) - - contains `openai-gpt`: TFOpenAIGPTModel (OpenAI GPT model) - - contains `gpt2`: TFGPT2Model (OpenAI GPT-2 model) - - contains `transfo-xl`: TFTransfoXLModel (Transformer-XL model) - - contains `xlnet`: TFXLNetModel (XLNet model) - - contains `ctrl`: TFCTRLModel (CTRL model) + The `from_pretrained()` method takes care of returning the correct model class instance + based on the `model_type` property of the config object, or when it's missing, + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: TFT5Model (T5 model) + - `distilbert`: TFDistilBertModel (DistilBERT model) + - `roberta`: TFRobertaModel (RoBERTa model) + - `bert`: TFTFBertModel (Bert model) + - `openai-gpt`: TFOpenAIGPTModel (OpenAI GPT model) + - `gpt2`: TFGPT2Model (OpenAI GPT-2 model) + - `transfo-xl`: TFTransfoXLModel (Transformer-XL model) + - `xlnet`: TFXLNetModel (XLNet model) + - `ctrl`: TFCTRLModel (CTRL model) Params: pretrained_model_name_or_path: either: @@ -424,21 +394,18 @@ class TFAutoModelForPreTraining(object): The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: :class:`~transformers.TFT5ModelWithLMHead` (T5 model) - - contains `distilbert`: :class:`~transformers.TFDistilBertForMaskedLM` (DistilBERT model) - - contains `albert`: :class:`~transformers.TFAlbertForPreTraining` (ALBERT model) - - contains `roberta`: :class:`~transformers.TFRobertaForMaskedLM` (RoBERTa model) - - contains `bert`: :class:`~transformers.TFBertForPreTraining` (Bert model) - - contains `openai-gpt`: :class:`~transformers.TFOpenAIGPTLMHeadModel` (OpenAI GPT model) - - contains `gpt2`: :class:`~transformers.TFGPT2LMHeadModel` (OpenAI GPT-2 model) - - contains `transfo-xl`: :class:`~transformers.TFTransfoXLLMHeadModel` (Transformer-XL model) - - contains `xlnet`: :class:`~transformers.TFXLNetLMHeadModel` (XLNet model) - - contains `xlm`: :class:`~transformers.TFXLMWithLMHeadModel` (XLM model) - - contains `ctrl`: :class:`~transformers.TFCTRLLMHeadModel` (Salesforce CTRL model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: :class:`~transformers.TFT5ModelWithLMHead` (T5 model) + - `distilbert`: :class:`~transformers.TFDistilBertForMaskedLM` (DistilBERT model) + - `albert`: :class:`~transformers.TFAlbertForPreTraining` (ALBERT model) + - `roberta`: :class:`~transformers.TFRobertaForMaskedLM` (RoBERTa model) + - `bert`: :class:`~transformers.TFBertForPreTraining` (Bert model) + - `openai-gpt`: :class:`~transformers.TFOpenAIGPTLMHeadModel` (OpenAI GPT model) + - `gpt2`: :class:`~transformers.TFGPT2LMHeadModel` (OpenAI GPT-2 model) + - `transfo-xl`: :class:`~transformers.TFTransfoXLLMHeadModel` (Transformer-XL model) + - `xlnet`: :class:`~transformers.TFXLNetLMHeadModel` (XLNet model) + - `xlm`: :class:`~transformers.TFXLMWithLMHeadModel` (XLM model) + - `ctrl`: :class:`~transformers.TFCTRLLMHeadModel` (Salesforce CTRL model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` @@ -525,20 +492,17 @@ class TFAutoModelWithLMHead(object): The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: TFT5ForConditionalGeneration (T5 model) - - contains `distilbert`: TFDistilBertForMaskedLM (DistilBERT model) - - contains `roberta`: TFRobertaForMaskedLM (RoBERTa model) - - contains `bert`: TFBertForMaskedLM (Bert model) - - contains `openai-gpt`: TFOpenAIGPTLMHeadModel (OpenAI GPT model) - - contains `gpt2`: TFGPT2LMHeadModel (OpenAI GPT-2 model) - - contains `transfo-xl`: TFTransfoXLLMHeadModel (Transformer-XL model) - - contains `xlnet`: TFXLNetLMHeadModel (XLNet model) - - contains `xlm`: TFXLMWithLMHeadModel (XLM model) - - contains `ctrl`: TFCTRLLMHeadModel (CTRL model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: TFT5ForConditionalGeneration (T5 model) + - `distilbert`: TFDistilBertForMaskedLM (DistilBERT model) + - `roberta`: TFRobertaForMaskedLM (RoBERTa model) + - `bert`: TFBertForMaskedLM (Bert model) + - `openai-gpt`: TFOpenAIGPTLMHeadModel (OpenAI GPT model) + - `gpt2`: TFGPT2LMHeadModel (OpenAI GPT-2 model) + - `transfo-xl`: TFTransfoXLLMHeadModel (Transformer-XL model) + - `xlnet`: TFXLNetLMHeadModel (XLNet model) + - `xlm`: TFXLMWithLMHeadModel (XLM model) + - `ctrl`: TFCTRLLMHeadModel (CTRL model) This class cannot be instantiated using `__init__()` (throws an error). """ @@ -595,20 +559,17 @@ class TFAutoModelWithLMHead(object): The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: TFT5ForConditionalGeneration (T5 model) - - contains `distilbert`: TFDistilBertForMaskedLM (DistilBERT model) - - contains `roberta`: TFRobertaForMaskedLM (RoBERTa model) - - contains `bert`: TFBertForMaskedLM (Bert model) - - contains `openai-gpt`: TFOpenAIGPTLMHeadModel (OpenAI GPT model) - - contains `gpt2`: TFGPT2LMHeadModel (OpenAI GPT-2 model) - - contains `transfo-xl`: TFTransfoXLLMHeadModel (Transformer-XL model) - - contains `xlnet`: TFXLNetLMHeadModel (XLNet model) - - contains `xlm`: TFXLMWithLMHeadModel (XLM model) - - contains `ctrl`: TFCTRLLMHeadModel (CTRL model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: TFT5ForConditionalGeneration (T5 model) + - `distilbert`: TFDistilBertForMaskedLM (DistilBERT model) + - `roberta`: TFRobertaForMaskedLM (RoBERTa model) + - `bert`: TFBertForMaskedLM (Bert model) + - `openai-gpt`: TFOpenAIGPTLMHeadModel (OpenAI GPT model) + - `gpt2`: TFGPT2LMHeadModel (OpenAI GPT-2 model) + - `transfo-xl`: TFTransfoXLLMHeadModel (Transformer-XL model) + - `xlnet`: TFXLNetLMHeadModel (XLNet model) + - `xlm`: TFXLMWithLMHeadModel (XLM model) + - `ctrl`: TFCTRLLMHeadModel (CTRL model) Params: pretrained_model_name_or_path: either: @@ -694,12 +655,9 @@ class TFAutoModelForMultipleChoice: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `albert`: TFAlbertForMultipleChoice (Albert model) - - contains `bert`: TFBertForMultipleChoice (Bert model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `albert`: TFAlbertForMultipleChoice (Albert model) + - `bert`: TFBertForMultipleChoice (Bert model) This class cannot be instantiated using `__init__()` (throws an error). """ @@ -751,12 +709,9 @@ class TFAutoModelForMultipleChoice: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `albert`: TFRobertaForMultiple (Albert model) - - contains `bert`: TFBertForMultipleChoice (Bert model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `albert`: TFRobertaForMultiple (Albert model) + - `bert`: TFBertForMultipleChoice (Bert model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` @@ -847,15 +802,12 @@ class TFAutoModelForSequenceClassification(object): The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `distilbert`: TFDistilBertForSequenceClassification (DistilBERT model) - - contains `roberta`: TFRobertaForSequenceClassification (RoBERTa model) - - contains `bert`: TFBertForSequenceClassification (Bert model) - - contains `xlnet`: TFXLNetForSequenceClassification (XLNet model) - - contains `xlm`: TFXLMForSequenceClassification (XLM model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `distilbert`: TFDistilBertForSequenceClassification (DistilBERT model) + - `roberta`: TFRobertaForSequenceClassification (RoBERTa model) + - `bert`: TFBertForSequenceClassification (Bert model) + - `xlnet`: TFXLNetForSequenceClassification (XLNet model) + - `xlm`: TFXLMForSequenceClassification (XLM model) This class cannot be instantiated using `__init__()` (throws an error). """ @@ -910,15 +862,12 @@ class TFAutoModelForSequenceClassification(object): The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `distilbert`: TFDistilBertForSequenceClassification (DistilBERT model) - - contains `roberta`: TFRobertaForSequenceClassification (RoBERTa model) - - contains `bert`: TFBertForSequenceClassification (Bert model) - - contains `xlnet`: TFXLNetForSequenceClassification (XLNet model) - - contains `xlm`: TFXLMForSequenceClassification (XLM model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `distilbert`: TFDistilBertForSequenceClassification (DistilBERT model) + - `roberta`: TFRobertaForSequenceClassification (RoBERTa model) + - `bert`: TFBertForSequenceClassification (Bert model) + - `xlnet`: TFXLNetForSequenceClassification (XLNet model) + - `xlm`: TFXLMForSequenceClassification (XLM model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` @@ -1009,16 +958,13 @@ class TFAutoModelForQuestionAnswering(object): The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `distilbert`: TFDistilBertForQuestionAnswering (DistilBERT model) - - contains `albert`: TFAlbertForQuestionAnswering (ALBERT model) - - contains `roberta`: TFRobertaForQuestionAnswering (RoBERTa model) - - contains `bert`: TFBertForQuestionAnswering (Bert model) - - contains `xlnet`: TFXLNetForQuestionAnswering (XLNet model) - - contains `xlm`: TFXLMForQuestionAnswering (XLM model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `distilbert`: TFDistilBertForQuestionAnswering (DistilBERT model) + - `albert`: TFAlbertForQuestionAnswering (ALBERT model) + - `roberta`: TFRobertaForQuestionAnswering (RoBERTa model) + - `bert`: TFBertForQuestionAnswering (Bert model) + - `xlnet`: TFXLNetForQuestionAnswering (XLNet model) + - `xlm`: TFXLMForQuestionAnswering (XLM model) This class cannot be instantiated using `__init__()` (throws an error). """ @@ -1074,16 +1020,13 @@ class TFAutoModelForQuestionAnswering(object): The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `distilbert`: TFDistilBertForQuestionAnswering (DistilBERT model) - - contains `albert`: TFAlbertForQuestionAnswering (ALBERT model) - - contains `roberta`: TFRobertaForQuestionAnswering (RoBERTa model) - - contains `bert`: TFBertForQuestionAnswering (Bert model) - - contains `xlnet`: TFXLNetForQuestionAnswering (XLNet model) - - contains `xlm`: TFXLMForQuestionAnswering (XLM model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `distilbert`: TFDistilBertForQuestionAnswering (DistilBERT model) + - `albert`: TFAlbertForQuestionAnswering (ALBERT model) + - `roberta`: TFRobertaForQuestionAnswering (RoBERTa model) + - `bert`: TFBertForQuestionAnswering (Bert model) + - `xlnet`: TFXLNetForQuestionAnswering (XLNet model) + - `xlm`: TFXLMForQuestionAnswering (XLM model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` @@ -1215,14 +1158,11 @@ class TFAutoModelForTokenClassification: The `from_pretrained()` method takes care of returning the correct model class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The model class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `bert`: BertForTokenClassification (Bert model) - - contains `xlnet`: XLNetForTokenClassification (XLNet model) - - contains `distilbert`: DistilBertForTokenClassification (DistilBert model) - - contains `roberta`: RobertaForTokenClassification (Roberta model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `bert`: BertForTokenClassification (Bert model) + - `xlnet`: XLNetForTokenClassification (XLNet model) + - `distilbert`: DistilBertForTokenClassification (DistilBert model) + - `roberta`: RobertaForTokenClassification (Roberta model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) To train the model, you should first set it back in training mode with `model.train()` diff --git a/src/transformers/modeling_tf_bert.py b/src/transformers/modeling_tf_bert.py index 0973e1fd40..5ec6aeb7d0 100644 --- a/src/transformers/modeling_tf_bert.py +++ b/src/transformers/modeling_tf_bert.py @@ -30,28 +30,29 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - "bert-base-uncased": "https://cdn.huggingface.co/bert-base-uncased-tf_model.h5", - "bert-large-uncased": "https://cdn.huggingface.co/bert-large-uncased-tf_model.h5", - "bert-base-cased": "https://cdn.huggingface.co/bert-base-cased-tf_model.h5", - "bert-large-cased": "https://cdn.huggingface.co/bert-large-cased-tf_model.h5", - "bert-base-multilingual-uncased": "https://cdn.huggingface.co/bert-base-multilingual-uncased-tf_model.h5", - "bert-base-multilingual-cased": "https://cdn.huggingface.co/bert-base-multilingual-cased-tf_model.h5", - "bert-base-chinese": "https://cdn.huggingface.co/bert-base-chinese-tf_model.h5", - "bert-base-german-cased": "https://cdn.huggingface.co/bert-base-german-cased-tf_model.h5", - "bert-large-uncased-whole-word-masking": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-tf_model.h5", - "bert-large-cased-whole-word-masking": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-tf_model.h5", - "bert-large-uncased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad-tf_model.h5", - "bert-large-cased-whole-word-masking-finetuned-squad": "https://cdn.huggingface.co/bert-large-cased-whole-word-masking-finetuned-squad-tf_model.h5", - "bert-base-cased-finetuned-mrpc": "https://cdn.huggingface.co/bert-base-cased-finetuned-mrpc-tf_model.h5", - "bert-base-japanese": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese/tf_model.h5", - "bert-base-japanese-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-whole-word-masking/tf_model.h5", - "bert-base-japanese-char": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char/tf_model.h5", - "bert-base-japanese-char-whole-word-masking": "https://cdn.huggingface.co/cl-tohoku/bert-base-japanese-char-whole-word-masking/tf_model.h5", - "bert-base-finnish-cased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-cased-v1/tf_model.h5", - "bert-base-finnish-uncased-v1": "https://cdn.huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1/tf_model.h5", - "bert-base-dutch-cased": "https://cdn.huggingface.co/wietsedv/bert-base-dutch-cased/tf_model.h5", -} +TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "bert-base-uncased", + "bert-large-uncased", + "bert-base-cased", + "bert-large-cased", + "bert-base-multilingual-uncased", + "bert-base-multilingual-cased", + "bert-base-chinese", + "bert-base-german-cased", + "bert-large-uncased-whole-word-masking", + "bert-large-cased-whole-word-masking", + "bert-large-uncased-whole-word-masking-finetuned-squad", + "bert-large-cased-whole-word-masking-finetuned-squad", + "bert-base-cased-finetuned-mrpc", + "cl-tohoku/bert-base-japanese", + "cl-tohoku/bert-base-japanese-whole-word-masking", + "cl-tohoku/bert-base-japanese-char", + "cl-tohoku/bert-base-japanese-char-whole-word-masking", + "TurkuNLP/bert-base-finnish-cased-v1", + "TurkuNLP/bert-base-finnish-uncased-v1", + "wietsedv/bert-base-dutch-cased", + # See all BERT models at https://huggingface.co/models?filter=bert +] def gelu(x): @@ -585,7 +586,6 @@ class TFBertPreTrainedModel(TFPreTrainedModel): """ config_class = BertConfig - pretrained_model_archive_map = TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "bert" diff --git a/src/transformers/modeling_tf_camembert.py b/src/transformers/modeling_tf_camembert.py index 11318654c3..e0c44371bc 100644 --- a/src/transformers/modeling_tf_camembert.py +++ b/src/transformers/modeling_tf_camembert.py @@ -30,7 +30,9 @@ from .modeling_tf_roberta import ( logger = logging.getLogger(__name__) -TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {} +TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + # See all CamemBERT models at https://huggingface.co/models?filter=camembert +] CAMEMBERT_START_DOCSTRING = r""" @@ -72,7 +74,6 @@ class TFCamembertModel(TFRobertaModel): """ config_class = CamembertConfig - pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -85,7 +86,6 @@ class TFCamembertForMaskedLM(TFRobertaForMaskedLM): """ config_class = CamembertConfig - pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -100,7 +100,6 @@ class TFCamembertForSequenceClassification(TFRobertaForSequenceClassification): """ config_class = CamembertConfig - pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -115,4 +114,3 @@ class TFCamembertForTokenClassification(TFRobertaForTokenClassification): """ config_class = CamembertConfig - pretrained_model_archive_map = TF_CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP diff --git a/src/transformers/modeling_tf_ctrl.py b/src/transformers/modeling_tf_ctrl.py index 868df4748a..4660477825 100644 --- a/src/transformers/modeling_tf_ctrl.py +++ b/src/transformers/modeling_tf_ctrl.py @@ -29,7 +29,10 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP = {"ctrl": "https://cdn.huggingface.co/ctrl-tf_model.h5"} +TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "ctrl" + # See all CTRL models at https://huggingface.co/models?filter=ctrl +] def angle_defn(pos, i, d_model_size): @@ -379,7 +382,6 @@ class TFCTRLPreTrainedModel(TFPreTrainedModel): """ config_class = CTRLConfig - pretrained_model_archive_map = TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" diff --git a/src/transformers/modeling_tf_distilbert.py b/src/transformers/modeling_tf_distilbert.py index d582fdf422..2a34d4f514 100644 --- a/src/transformers/modeling_tf_distilbert.py +++ b/src/transformers/modeling_tf_distilbert.py @@ -31,14 +31,15 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { - "distilbert-base-uncased": "https://cdn.huggingface.co/distilbert-base-uncased-tf_model.h5", - "distilbert-base-uncased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-uncased-distilled-squad-tf_model.h5", - "distilbert-base-cased": "https://cdn.huggingface.co/distilbert-base-cased-tf_model.h5", - "distilbert-base-cased-distilled-squad": "https://cdn.huggingface.co/distilbert-base-cased-distilled-squad-tf_model.h5", - "distilbert-base-multilingual-cased": "https://cdn.huggingface.co/distilbert-base-multilingual-cased-tf_model.h5", - "distilbert-base-uncased-finetuned-sst-2-english": "https://cdn.huggingface.co/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5", -} +TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "distilbert-base-uncased", + "distilbert-base-uncased-distilled-squad", + "distilbert-base-cased", + "distilbert-base-cased-distilled-squad", + "distilbert-base-multilingual-cased", + "distilbert-base-uncased-finetuned-sst-2-english", + # See all DistilBERT models at https://huggingface.co/models?filter=distilbert +] # UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE # @@ -467,7 +468,6 @@ class TFDistilBertPreTrainedModel(TFPreTrainedModel): """ config_class = DistilBertConfig - pretrained_model_archive_map = TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "distilbert" diff --git a/src/transformers/modeling_tf_electra.py b/src/transformers/modeling_tf_electra.py index ad1455b647..37dae2c647 100644 --- a/src/transformers/modeling_tf_electra.py +++ b/src/transformers/modeling_tf_electra.py @@ -13,14 +13,15 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP = { - "google/electra-small-generator": "https://cdn.huggingface.co/google/electra-small-generator/tf_model.h5", - "google/electra-base-generator": "https://cdn.huggingface.co/google/electra-base-generator/tf_model.h5", - "google/electra-large-generator": "https://cdn.huggingface.co/google/electra-large-generator/tf_model.h5", - "google/electra-small-discriminator": "https://cdn.huggingface.co/google/electra-small-discriminator/tf_model.h5", - "google/electra-base-discriminator": "https://cdn.huggingface.co/google/electra-base-discriminator/tf_model.h5", - "google/electra-large-discriminator": "https://cdn.huggingface.co/google/electra-large-discriminator/tf_model.h5", -} +TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "google/electra-small-generator", + "google/electra-base-generator", + "google/electra-large-generator", + "google/electra-small-discriminator", + "google/electra-base-discriminator", + "google/electra-large-discriminator", + # See all ELECTRA models at https://huggingface.co/models?filter=electra +] class TFElectraEmbeddings(tf.keras.layers.Layer): @@ -160,7 +161,6 @@ class TFElectraGeneratorPredictions(tf.keras.layers.Layer): class TFElectraPreTrainedModel(TFBertPreTrainedModel): config_class = ElectraConfig - pretrained_model_archive_map = TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "electra" def get_extended_attention_mask(self, attention_mask, input_shape): diff --git a/src/transformers/modeling_tf_flaubert.py b/src/transformers/modeling_tf_flaubert.py index ca94a380a7..be4cd27a31 100644 --- a/src/transformers/modeling_tf_flaubert.py +++ b/src/transformers/modeling_tf_flaubert.py @@ -35,7 +35,9 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {} +TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + # See all Flaubert models at https://huggingface.co/models?filter=flaubert +] FLAUBERT_START_DOCSTRING = r""" @@ -104,7 +106,6 @@ FLAUBERT_INPUTS_DOCSTRING = r""" ) class TFFlaubertModel(TFXLMModel): config_class = FlaubertConfig - pretrained_model_archive_map = TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) @@ -309,7 +310,6 @@ class TFFlaubertMainLayer(TFXLMMainLayer): ) class TFFlaubertWithLMHeadModel(TFXLMWithLMHeadModel): config_class = FlaubertConfig - pretrained_model_archive_map = TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) @@ -323,7 +323,6 @@ class TFFlaubertWithLMHeadModel(TFXLMWithLMHeadModel): ) class TFFlaubertForSequenceClassification(TFXLMForSequenceClassification): config_class = FlaubertConfig - pretrained_model_archive_map = TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) diff --git a/src/transformers/modeling_tf_gpt2.py b/src/transformers/modeling_tf_gpt2.py index 50d2752732..047c4afa52 100644 --- a/src/transformers/modeling_tf_gpt2.py +++ b/src/transformers/modeling_tf_gpt2.py @@ -37,13 +37,14 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP = { - "gpt2": "https://cdn.huggingface.co/gpt2-tf_model.h5", - "gpt2-medium": "https://cdn.huggingface.co/gpt2-medium-tf_model.h5", - "gpt2-large": "https://cdn.huggingface.co/gpt2-large-tf_model.h5", - "gpt2-xl": "https://cdn.huggingface.co/gpt2-xl-tf_model.h5", - "distilgpt2": "https://cdn.huggingface.co/distilgpt2-tf_model.h5", -} +TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "gpt2", + "gpt2-medium", + "gpt2-large", + "gpt2-xl", + "distilgpt2", + # See all GPT-2 models at https://huggingface.co/models?filter=gpt2 +] def gelu(x): @@ -389,7 +390,6 @@ class TFGPT2PreTrainedModel(TFPreTrainedModel): """ config_class = GPT2Config - pretrained_model_archive_map = TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" diff --git a/src/transformers/modeling_tf_openai.py b/src/transformers/modeling_tf_openai.py index e2189604ba..5f8eabee65 100644 --- a/src/transformers/modeling_tf_openai.py +++ b/src/transformers/modeling_tf_openai.py @@ -36,7 +36,10 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP = {"openai-gpt": "https://cdn.huggingface.co/openai-gpt-tf_model.h5"} +TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "openai-gpt", + # See all OpenAI GPT models at https://huggingface.co/models?filter=openai-gpt +] def gelu(x): @@ -349,7 +352,6 @@ class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel): """ config_class = OpenAIGPTConfig - pretrained_model_archive_map = TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" diff --git a/src/transformers/modeling_tf_roberta.py b/src/transformers/modeling_tf_roberta.py index 9e91f4adac..40446d9ea4 100644 --- a/src/transformers/modeling_tf_roberta.py +++ b/src/transformers/modeling_tf_roberta.py @@ -28,12 +28,13 @@ from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list logger = logging.getLogger(__name__) -TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = { - "roberta-base": "https://cdn.huggingface.co/roberta-base-tf_model.h5", - "roberta-large": "https://cdn.huggingface.co/roberta-large-tf_model.h5", - "roberta-large-mnli": "https://cdn.huggingface.co/roberta-large-mnli-tf_model.h5", - "distilroberta-base": "https://cdn.huggingface.co/distilroberta-base-tf_model.h5", -} +TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "roberta-base", + "roberta-large", + "roberta-large-mnli", + "distilroberta-base", + # See all RoBERTa models at https://huggingface.co/models?filter=roberta +] class TFRobertaEmbeddings(TFBertEmbeddings): @@ -100,7 +101,6 @@ class TFRobertaPreTrainedModel(TFPreTrainedModel): """ config_class = RobertaConfig - pretrained_model_archive_map = TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "roberta" diff --git a/src/transformers/modeling_tf_t5.py b/src/transformers/modeling_tf_t5.py index 8f429bb354..f59d26bed7 100644 --- a/src/transformers/modeling_tf_t5.py +++ b/src/transformers/modeling_tf_t5.py @@ -30,13 +30,14 @@ from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, shape_list logger = logging.getLogger(__name__) -TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP = { - "t5-small": "https://cdn.huggingface.co/t5-small-tf_model.h5", - "t5-base": "https://cdn.huggingface.co/t5-base-tf_model.h5", - "t5-large": "https://cdn.huggingface.co/t5-large-tf_model.h5", - "t5-3b": "https://cdn.huggingface.co/t5-3b-tf_model.h5", - "t5-11b": "https://cdn.huggingface.co/t5-11b-tf_model.h5", -} +TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "t5-small", + "t5-base", + "t5-large", + "t5-3b", + "t5-11b", + # See all T5 models at https://huggingface.co/models?filter=t5 +] #################################################### # TF 2.0 Models are constructed using Keras imperative API by sub-classing @@ -720,7 +721,6 @@ class TFT5PreTrainedModel(TFPreTrainedModel): """ config_class = T5Config - pretrained_model_archive_map = TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" @property diff --git a/src/transformers/modeling_tf_transfo_xl.py b/src/transformers/modeling_tf_transfo_xl.py index 83a641740c..038b768a8c 100644 --- a/src/transformers/modeling_tf_transfo_xl.py +++ b/src/transformers/modeling_tf_transfo_xl.py @@ -30,9 +30,10 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP = { - "transfo-xl-wt103": "https://cdn.huggingface.co/transfo-xl-wt103-tf_model.h5", -} +TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "transfo-xl-wt103", + # See all Transformer XL models at https://huggingface.co/models?filter=transfo-xl +] class TFPositionalEmbedding(tf.keras.layers.Layer): @@ -630,7 +631,6 @@ class TFTransfoXLPreTrainedModel(TFPreTrainedModel): """ config_class = TransfoXLConfig - pretrained_model_archive_map = TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 1ee5b60682..e0bbeb0d0b 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -112,7 +112,6 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): Class attributes (overridden by derived classes): - ``config_class``: a class derived from :class:`~transformers.PretrainedConfig` to use as configuration class for this model architecture. - - ``pretrained_model_archive_map``: a python ``dict`` of with `short-cut-names` (string) as keys and `url` (string) of associated pretrained weights as values. - ``load_tf_weights``: a python ``method`` for loading a TensorFlow checkpoint in a PyTorch model, taking as arguments: - ``model``: an instance of the relevant subclass of :class:`~transformers.PreTrainedModel`, @@ -122,7 +121,6 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): - ``base_model_prefix``: a string indicating the attribute associated to the base model in derived classes of the same architecture adding modules on top of the base model. """ config_class = None - pretrained_model_archive_map = {} base_model_prefix = "" @property @@ -338,9 +336,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): # Load model if pretrained_model_name_or_path is not None: - if pretrained_model_name_or_path in cls.pretrained_model_archive_map: - archive_file = cls.pretrained_model_archive_map[pretrained_model_name_or_path] - elif os.path.isdir(pretrained_model_name_or_path): + if os.path.isdir(pretrained_model_name_or_path): if os.path.isfile(os.path.join(pretrained_model_name_or_path, TF2_WEIGHTS_NAME)): # Load from a TF 2.0 checkpoint archive_file = os.path.join(pretrained_model_name_or_path, TF2_WEIGHTS_NAME) @@ -364,8 +360,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): use_cdn=use_cdn, ) - # redirect to the cache, if necessary try: + # Load from URL or cache if already cached resolved_archive_file = cached_path( archive_file, cache_dir=cache_dir, @@ -373,20 +369,15 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): resume_download=resume_download, proxies=proxies, ) - except EnvironmentError as e: - if pretrained_model_name_or_path in cls.pretrained_model_archive_map: - logger.error("Couldn't reach server at '{}' to download pretrained weights.".format(archive_file)) - else: - logger.error( - "Model name '{}' was not found in model name list ({}). " - "We assumed '{}' was a path or url but couldn't find any file " - "associated to this path or url.".format( - pretrained_model_name_or_path, - ", ".join(cls.pretrained_model_archive_map.keys()), - archive_file, - ) - ) - raise e + if resolved_archive_file is None: + raise EnvironmentError + except EnvironmentError: + msg = ( + f"Can't load weights for '{pretrained_model_name_or_path}'. Make sure that:\n\n" + f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n" + f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a file named one of {TF2_WEIGHTS_NAME}, {WEIGHTS_NAME}.\n\n" + ) + raise EnvironmentError(msg) if resolved_archive_file == archive_file: logger.info("loading weights file {}".format(archive_file)) else: diff --git a/src/transformers/modeling_tf_xlm.py b/src/transformers/modeling_tf_xlm.py index d82cbab58a..659aa15022 100644 --- a/src/transformers/modeling_tf_xlm.py +++ b/src/transformers/modeling_tf_xlm.py @@ -31,18 +31,19 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP = { - "xlm-mlm-en-2048": "https://cdn.huggingface.co/xlm-mlm-en-2048-tf_model.h5", - "xlm-mlm-ende-1024": "https://cdn.huggingface.co/xlm-mlm-ende-1024-tf_model.h5", - "xlm-mlm-enfr-1024": "https://cdn.huggingface.co/xlm-mlm-enfr-1024-tf_model.h5", - "xlm-mlm-enro-1024": "https://cdn.huggingface.co/xlm-mlm-enro-1024-tf_model.h5", - "xlm-mlm-tlm-xnli15-1024": "https://cdn.huggingface.co/xlm-mlm-tlm-xnli15-1024-tf_model.h5", - "xlm-mlm-xnli15-1024": "https://cdn.huggingface.co/xlm-mlm-xnli15-1024-tf_model.h5", - "xlm-clm-enfr-1024": "https://cdn.huggingface.co/xlm-clm-enfr-1024-tf_model.h5", - "xlm-clm-ende-1024": "https://cdn.huggingface.co/xlm-clm-ende-1024-tf_model.h5", - "xlm-mlm-17-1280": "https://cdn.huggingface.co/xlm-mlm-17-1280-tf_model.h5", - "xlm-mlm-100-1280": "https://cdn.huggingface.co/xlm-mlm-100-1280-tf_model.h5", -} +TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "xlm-mlm-en-2048", + "xlm-mlm-ende-1024", + "xlm-mlm-enfr-1024", + "xlm-mlm-enro-1024", + "xlm-mlm-tlm-xnli15-1024", + "xlm-mlm-xnli15-1024", + "xlm-clm-enfr-1024", + "xlm-clm-ende-1024", + "xlm-mlm-17-1280", + "xlm-mlm-100-1280", + # See all XLM models at https://huggingface.co/models?filter=xlm +] def create_sinusoidal_embeddings(n_pos, dim, out): @@ -470,7 +471,6 @@ class TFXLMPreTrainedModel(TFPreTrainedModel): """ config_class = XLMConfig - pretrained_model_archive_map = TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" @property diff --git a/src/transformers/modeling_tf_xlm_roberta.py b/src/transformers/modeling_tf_xlm_roberta.py index 8b1efdb65d..dee354e0ea 100644 --- a/src/transformers/modeling_tf_xlm_roberta.py +++ b/src/transformers/modeling_tf_xlm_roberta.py @@ -30,7 +30,9 @@ from .modeling_tf_roberta import ( logger = logging.getLogger(__name__) -TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = {} +TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [ + # See all XLM-RoBERTa models at https://huggingface.co/models?filter=xlm-roberta +] XLM_ROBERTA_START_DOCSTRING = r""" @@ -72,7 +74,6 @@ class TFXLMRobertaModel(TFRobertaModel): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -85,7 +86,6 @@ class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -100,7 +100,6 @@ class TFXLMRobertaForSequenceClassification(TFRobertaForSequenceClassification): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -115,4 +114,3 @@ class TFXLMRobertaForTokenClassification(TFRobertaForTokenClassification): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP diff --git a/src/transformers/modeling_tf_xlnet.py b/src/transformers/modeling_tf_xlnet.py index b2b5d974e2..15a4566f72 100644 --- a/src/transformers/modeling_tf_xlnet.py +++ b/src/transformers/modeling_tf_xlnet.py @@ -37,10 +37,11 @@ from .tokenization_utils import BatchEncoding logger = logging.getLogger(__name__) -TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP = { - "xlnet-base-cased": "https://cdn.huggingface.co/xlnet-base-cased-tf_model.h5", - "xlnet-large-cased": "https://cdn.huggingface.co/xlnet-large-cased-tf_model.h5", -} +TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "xlnet-base-cased", + "xlnet-large-cased", + # See all XLNet models at https://huggingface.co/models?filter=xlnet +] def gelu(x): @@ -701,7 +702,6 @@ class TFXLNetPreTrainedModel(TFPreTrainedModel): """ config_class = XLNetConfig - pretrained_model_archive_map = TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" diff --git a/src/transformers/modeling_transfo_xl.py b/src/transformers/modeling_transfo_xl.py index 85224ba86e..25c3592999 100644 --- a/src/transformers/modeling_transfo_xl.py +++ b/src/transformers/modeling_transfo_xl.py @@ -33,9 +33,10 @@ from .modeling_utils import PreTrainedModel logger = logging.getLogger(__name__) -TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP = { - "transfo-xl-wt103": "https://cdn.huggingface.co/transfo-xl-wt103-pytorch_model.bin", -} +TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "transfo-xl-wt103", + # See all Transformer XL models at https://huggingface.co/models?filter=transfo-xl +] def build_tf_to_pytorch_map(model, config): @@ -453,7 +454,6 @@ class TransfoXLPreTrainedModel(PreTrainedModel): """ config_class = TransfoXLConfig - pretrained_model_archive_map = TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_transfo_xl base_model_prefix = "transformer" diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 84d415d05d..42ebf0057e 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -110,6 +110,9 @@ class ModuleUtilsMixin: @property def device(self) -> device: + """ + Get torch.device from module, assuming that the whole module has one device. + """ try: return next(self.parameters()).device except StopIteration: @@ -125,6 +128,9 @@ class ModuleUtilsMixin: @property def dtype(self) -> dtype: + """ + Get torch.dtype from module, assuming that the whole module has one dtype. + """ try: return next(self.parameters()).dtype except StopIteration: @@ -249,7 +255,6 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): Class attributes (overridden by derived classes): - ``config_class``: a class derived from :class:`~transformers.PretrainedConfig` to use as configuration class for this model architecture. - - ``pretrained_model_archive_map``: a python ``dict`` of with `short-cut-names` (string) as keys and `url` (string) of associated pretrained weights as values. - ``load_tf_weights``: a python ``method`` for loading a TensorFlow checkpoint in a PyTorch model, taking as arguments: - ``model``: an instance of the relevant subclass of :class:`~transformers.PreTrainedModel`, @@ -259,7 +264,6 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): - ``base_model_prefix``: a string indicating the attribute associated to the base model in derived classes of the same architecture adding modules on top of the base model. """ config_class = None - pretrained_model_archive_map = {} base_model_prefix = "" @property @@ -587,9 +591,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): # Load model if pretrained_model_name_or_path is not None: - if pretrained_model_name_or_path in cls.pretrained_model_archive_map: - archive_file = cls.pretrained_model_archive_map[pretrained_model_name_or_path] - elif os.path.isdir(pretrained_model_name_or_path): + if os.path.isdir(pretrained_model_name_or_path): if from_tf and os.path.isfile(os.path.join(pretrained_model_name_or_path, TF_WEIGHTS_NAME + ".index")): # Load from a TF 1.0 checkpoint archive_file = os.path.join(pretrained_model_name_or_path, TF_WEIGHTS_NAME + ".index") @@ -622,8 +624,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): use_cdn=use_cdn, ) - # redirect to the cache, if necessary try: + # Load from URL or cache if already cached resolved_archive_file = cached_path( archive_file, cache_dir=cache_dir, @@ -632,20 +634,14 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): resume_download=resume_download, local_files_only=local_files_only, ) + if resolved_archive_file is None: + raise EnvironmentError except EnvironmentError: - if pretrained_model_name_or_path in cls.pretrained_model_archive_map: - msg = "Couldn't reach server at '{}' to download pretrained weights.".format(archive_file) - else: - msg = ( - "Model name '{}' was not found in model name list ({}). " - "We assumed '{}' was a path or url to model weight files named one of {} but " - "couldn't find any such file at this path or url.".format( - pretrained_model_name_or_path, - ", ".join(cls.pretrained_model_archive_map.keys()), - archive_file, - [WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME], - ) - ) + msg = ( + f"Can't load weights for '{pretrained_model_name_or_path}'. Make sure that:\n\n" + f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n" + f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a file named one of {WEIGHTS_NAME}, {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME}.\n\n" + ) raise EnvironmentError(msg) if resolved_archive_file == archive_file: diff --git a/src/transformers/modeling_xlm.py b/src/transformers/modeling_xlm.py index b9ef58015d..44187db704 100644 --- a/src/transformers/modeling_xlm.py +++ b/src/transformers/modeling_xlm.py @@ -34,18 +34,19 @@ from .modeling_utils import PreTrainedModel, SequenceSummary, SQuADHead, prune_l logger = logging.getLogger(__name__) -XLM_PRETRAINED_MODEL_ARCHIVE_MAP = { - "xlm-mlm-en-2048": "https://cdn.huggingface.co/xlm-mlm-en-2048-pytorch_model.bin", - "xlm-mlm-ende-1024": "https://cdn.huggingface.co/xlm-mlm-ende-1024-pytorch_model.bin", - "xlm-mlm-enfr-1024": "https://cdn.huggingface.co/xlm-mlm-enfr-1024-pytorch_model.bin", - "xlm-mlm-enro-1024": "https://cdn.huggingface.co/xlm-mlm-enro-1024-pytorch_model.bin", - "xlm-mlm-tlm-xnli15-1024": "https://cdn.huggingface.co/xlm-mlm-tlm-xnli15-1024-pytorch_model.bin", - "xlm-mlm-xnli15-1024": "https://cdn.huggingface.co/xlm-mlm-xnli15-1024-pytorch_model.bin", - "xlm-clm-enfr-1024": "https://cdn.huggingface.co/xlm-clm-enfr-1024-pytorch_model.bin", - "xlm-clm-ende-1024": "https://cdn.huggingface.co/xlm-clm-ende-1024-pytorch_model.bin", - "xlm-mlm-17-1280": "https://cdn.huggingface.co/xlm-mlm-17-1280-pytorch_model.bin", - "xlm-mlm-100-1280": "https://cdn.huggingface.co/xlm-mlm-100-1280-pytorch_model.bin", -} +XLM_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "xlm-mlm-en-2048", + "xlm-mlm-ende-1024", + "xlm-mlm-enfr-1024", + "xlm-mlm-enro-1024", + "xlm-mlm-tlm-xnli15-1024", + "xlm-mlm-xnli15-1024", + "xlm-clm-enfr-1024", + "xlm-clm-ende-1024", + "xlm-mlm-17-1280", + "xlm-mlm-100-1280", + # See all XLM models at https://huggingface.co/models?filter=xlm +] def create_sinusoidal_embeddings(n_pos, dim, out): @@ -207,7 +208,6 @@ class XLMPreTrainedModel(PreTrainedModel): """ config_class = XLMConfig - pretrained_model_archive_map = XLM_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = None base_model_prefix = "transformer" diff --git a/src/transformers/modeling_xlm_roberta.py b/src/transformers/modeling_xlm_roberta.py index 63c978e1fb..132621a731 100644 --- a/src/transformers/modeling_xlm_roberta.py +++ b/src/transformers/modeling_xlm_roberta.py @@ -31,14 +31,15 @@ from .modeling_roberta import ( logger = logging.getLogger(__name__) -XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = { - "xlm-roberta-base": "https://cdn.huggingface.co/xlm-roberta-base-pytorch_model.bin", - "xlm-roberta-large": "https://cdn.huggingface.co/xlm-roberta-large-pytorch_model.bin", - "xlm-roberta-large-finetuned-conll02-dutch": "https://cdn.huggingface.co/xlm-roberta-large-finetuned-conll02-dutch-pytorch_model.bin", - "xlm-roberta-large-finetuned-conll02-spanish": "https://cdn.huggingface.co/xlm-roberta-large-finetuned-conll02-spanish-pytorch_model.bin", - "xlm-roberta-large-finetuned-conll03-english": "https://cdn.huggingface.co/xlm-roberta-large-finetuned-conll03-english-pytorch_model.bin", - "xlm-roberta-large-finetuned-conll03-german": "https://cdn.huggingface.co/xlm-roberta-large-finetuned-conll03-german-pytorch_model.bin", -} +XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "xlm-roberta-base", + "xlm-roberta-large", + "xlm-roberta-large-finetuned-conll02-dutch", + "xlm-roberta-large-finetuned-conll02-spanish", + "xlm-roberta-large-finetuned-conll03-english", + "xlm-roberta-large-finetuned-conll03-german", + # See all XLM-RoBERTa models at https://huggingface.co/models?filter=xlm-roberta +] XLM_ROBERTA_START_DOCSTRING = r""" @@ -65,7 +66,6 @@ class XLMRobertaModel(RobertaModel): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -78,7 +78,6 @@ class XLMRobertaForMaskedLM(RobertaForMaskedLM): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -93,7 +92,6 @@ class XLMRobertaForSequenceClassification(RobertaForSequenceClassification): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -108,7 +106,6 @@ class XLMRobertaForMultipleChoice(RobertaForMultipleChoice): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP @add_start_docstrings( @@ -123,4 +120,3 @@ class XLMRobertaForTokenClassification(RobertaForTokenClassification): """ config_class = XLMRobertaConfig - pretrained_model_archive_map = XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index 9dfbae4f6f..522460a22e 100644 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -32,10 +32,11 @@ from .modeling_utils import PoolerAnswerClass, PoolerEndLogits, PoolerStartLogit logger = logging.getLogger(__name__) -XLNET_PRETRAINED_MODEL_ARCHIVE_MAP = { - "xlnet-base-cased": "https://cdn.huggingface.co/xlnet-base-cased-pytorch_model.bin", - "xlnet-large-cased": "https://cdn.huggingface.co/xlnet-large-cased-pytorch_model.bin", -} +XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "xlnet-base-cased", + "xlnet-large-cased", + # See all XLNet models at https://huggingface.co/models?filter=xlnet +] def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None): @@ -459,7 +460,6 @@ class XLNetPreTrainedModel(PreTrainedModel): """ config_class = XLNetConfig - pretrained_model_archive_map = XLNET_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_xlnet base_model_prefix = "transformer" diff --git a/src/transformers/tokenization_auto.py b/src/transformers/tokenization_auto.py index 3ae453a468..0a00779890 100644 --- a/src/transformers/tokenization_auto.py +++ b/src/transformers/tokenization_auto.py @@ -97,27 +97,24 @@ class AutoTokenizer: when created with the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` class method. - The `from_pretrained()` method take care of returning the correct tokenizer class instance + The `from_pretrained()` method takes care of returning the correct tokenizer class instance based on the `model_type` property of the config object, or when it's missing, - falling back to using pattern matching on the `pretrained_model_name_or_path` string. - - The tokenizer class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: T5Tokenizer (T5 model) - - contains `distilbert`: DistilBertTokenizer (DistilBert model) - - contains `albert`: AlbertTokenizer (ALBERT model) - - contains `camembert`: CamembertTokenizer (CamemBERT model) - - contains `xlm-roberta`: XLMRobertaTokenizer (XLM-RoBERTa model) - - contains `longformer`: LongformerTokenizer (AllenAI Longformer model) - - contains `roberta`: RobertaTokenizer (RoBERTa model) - - contains `bert`: BertTokenizer (Bert model) - - contains `openai-gpt`: OpenAIGPTTokenizer (OpenAI GPT model) - - contains `gpt2`: GPT2Tokenizer (OpenAI GPT-2 model) - - contains `transfo-xl`: TransfoXLTokenizer (Transformer-XL model) - - contains `xlnet`: XLNetTokenizer (XLNet model) - - contains `xlm`: XLMTokenizer (XLM model) - - contains `ctrl`: CTRLTokenizer (Salesforce CTRL model) - - contains `electra`: ElectraTokenizer (Google ELECTRA model) + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: T5Tokenizer (T5 model) + - `distilbert`: DistilBertTokenizer (DistilBert model) + - `albert`: AlbertTokenizer (ALBERT model) + - `camembert`: CamembertTokenizer (CamemBERT model) + - `xlm-roberta`: XLMRobertaTokenizer (XLM-RoBERTa model) + - `longformer`: LongformerTokenizer (AllenAI Longformer model) + - `roberta`: RobertaTokenizer (RoBERTa model) + - `bert`: BertTokenizer (Bert model) + - `openai-gpt`: OpenAIGPTTokenizer (OpenAI GPT model) + - `gpt2`: GPT2Tokenizer (OpenAI GPT-2 model) + - `transfo-xl`: TransfoXLTokenizer (Transformer-XL model) + - `xlnet`: XLNetTokenizer (XLNet model) + - `xlm`: XLMTokenizer (XLM model) + - `ctrl`: CTRLTokenizer (Salesforce CTRL model) + - `electra`: ElectraTokenizer (Google ELECTRA model) This class cannot be instantiated using `__init__()` (throw an error). """ @@ -133,24 +130,25 @@ class AutoTokenizer: r""" Instantiate one of the tokenizer classes of the library from a pre-trained model vocabulary. - The tokenizer class to instantiate is selected as the first pattern matching - in the `pretrained_model_name_or_path` string (in the following order): - - contains `t5`: T5Tokenizer (T5 model) - - contains `distilbert`: DistilBertTokenizer (DistilBert model) - - contains `albert`: AlbertTokenizer (ALBERT model) - - contains `camembert`: CamembertTokenizer (CamemBERT model) - - contains `xlm-roberta`: XLMRobertaTokenizer (XLM-RoBERTa model) - - contains `longformer`: LongformerTokenizer (AllenAI Longformer model) - - contains `roberta`: RobertaTokenizer (RoBERTa model) - - contains `bert-base-japanese`: BertJapaneseTokenizer (Bert model) - - contains `bert`: BertTokenizer (Bert model) - - contains `openai-gpt`: OpenAIGPTTokenizer (OpenAI GPT model) - - contains `gpt2`: GPT2Tokenizer (OpenAI GPT-2 model) - - contains `transfo-xl`: TransfoXLTokenizer (Transformer-XL model) - - contains `xlnet`: XLNetTokenizer (XLNet model) - - contains `xlm`: XLMTokenizer (XLM model) - - contains `ctrl`: CTRLTokenizer (Salesforce CTRL model) - - contains `electra`: ElectraTokenizer (Google ELECTRA model) + The tokenizer class to instantiate is selected + based on the `model_type` property of the config object, or when it's missing, + falling back to using pattern matching on the `pretrained_model_name_or_path` string: + - `t5`: T5Tokenizer (T5 model) + - `distilbert`: DistilBertTokenizer (DistilBert model) + - `albert`: AlbertTokenizer (ALBERT model) + - `camembert`: CamembertTokenizer (CamemBERT model) + - `xlm-roberta`: XLMRobertaTokenizer (XLM-RoBERTa model) + - `longformer`: LongformerTokenizer (AllenAI Longformer model) + - `roberta`: RobertaTokenizer (RoBERTa model) + - `bert-base-japanese`: BertJapaneseTokenizer (Bert model) + - `bert`: BertTokenizer (Bert model) + - `openai-gpt`: OpenAIGPTTokenizer (OpenAI GPT model) + - `gpt2`: GPT2Tokenizer (OpenAI GPT-2 model) + - `transfo-xl`: TransfoXLTokenizer (Transformer-XL model) + - `xlnet`: XLNetTokenizer (XLNet model) + - `xlm`: XLMTokenizer (XLM model) + - `ctrl`: CTRLTokenizer (Salesforce CTRL model) + - `electra`: ElectraTokenizer (Google ELECTRA model) Params: pretrained_model_name_or_path: either: diff --git a/src/transformers/tokenization_bert.py b/src/transformers/tokenization_bert.py index 649c96d0a4..48ebf859a2 100644 --- a/src/transformers/tokenization_bert.py +++ b/src/transformers/tokenization_bert.py @@ -47,9 +47,9 @@ PRETRAINED_VOCAB_FILES_MAP = { "bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-vocab.txt", "bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-vocab.txt", "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-vocab.txt", - "bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/vocab.txt", - "bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt", - "bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/vocab.txt", + "TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/vocab.txt", + "TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt", + "wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/vocab.txt", } } @@ -69,9 +69,9 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { "bert-base-cased-finetuned-mrpc": 512, "bert-base-german-dbmdz-cased": 512, "bert-base-german-dbmdz-uncased": 512, - "bert-base-finnish-cased-v1": 512, - "bert-base-finnish-uncased-v1": 512, - "bert-base-dutch-cased": 512, + "TurkuNLP/bert-base-finnish-cased-v1": 512, + "TurkuNLP/bert-base-finnish-uncased-v1": 512, + "wietsedv/bert-base-dutch-cased": 512, } PRETRAINED_INIT_CONFIGURATION = { @@ -90,9 +90,9 @@ PRETRAINED_INIT_CONFIGURATION = { "bert-base-cased-finetuned-mrpc": {"do_lower_case": False}, "bert-base-german-dbmdz-cased": {"do_lower_case": False}, "bert-base-german-dbmdz-uncased": {"do_lower_case": True}, - "bert-base-finnish-cased-v1": {"do_lower_case": False}, - "bert-base-finnish-uncased-v1": {"do_lower_case": True}, - "bert-base-dutch-cased": {"do_lower_case": False}, + "TurkuNLP/bert-base-finnish-cased-v1": {"do_lower_case": False}, + "TurkuNLP/bert-base-finnish-uncased-v1": {"do_lower_case": True}, + "wietsedv/bert-base-dutch-cased": {"do_lower_case": False}, } diff --git a/src/transformers/tokenization_bert_japanese.py b/src/transformers/tokenization_bert_japanese.py index 531ae30f35..43ae8e1894 100644 --- a/src/transformers/tokenization_bert_japanese.py +++ b/src/transformers/tokenization_bert_japanese.py @@ -30,37 +30,37 @@ VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"} PRETRAINED_VOCAB_FILES_MAP = { "vocab_file": { - "bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/vocab.txt", - "bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/vocab.txt", - "bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/vocab.txt", - "bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/vocab.txt", + "cl-tohoku/bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/vocab.txt", + "cl-tohoku/bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/vocab.txt", + "cl-tohoku/bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/vocab.txt", + "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/vocab.txt", } } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { - "bert-base-japanese": 512, - "bert-base-japanese-whole-word-masking": 512, - "bert-base-japanese-char": 512, - "bert-base-japanese-char-whole-word-masking": 512, + "cl-tohoku/bert-base-japanese": 512, + "cl-tohoku/bert-base-japanese-whole-word-masking": 512, + "cl-tohoku/bert-base-japanese-char": 512, + "cl-tohoku/bert-base-japanese-char-whole-word-masking": 512, } PRETRAINED_INIT_CONFIGURATION = { - "bert-base-japanese": { + "cl-tohoku/bert-base-japanese": { "do_lower_case": False, "word_tokenizer_type": "mecab", "subword_tokenizer_type": "wordpiece", }, - "bert-base-japanese-whole-word-masking": { + "cl-tohoku/bert-base-japanese-whole-word-masking": { "do_lower_case": False, "word_tokenizer_type": "mecab", "subword_tokenizer_type": "wordpiece", }, - "bert-base-japanese-char": { + "cl-tohoku/bert-base-japanese-char": { "do_lower_case": False, "word_tokenizer_type": "mecab", "subword_tokenizer_type": "character", }, - "bert-base-japanese-char-whole-word-masking": { + "cl-tohoku/bert-base-japanese-char-whole-word-masking": { "do_lower_case": False, "word_tokenizer_type": "mecab", "subword_tokenizer_type": "character", diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index c3711e640d..d318ce690e 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -942,13 +942,11 @@ class PreTrainedTokenizer(SpecialTokensMixin): if os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): if len(cls.vocab_files_names) > 1: raise ValueError( - "Calling {}.from_pretrained() with the path to a single file or url is not supported." - "Use a model identifier or the path to a directory instead.".format(cls.__name__) + f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is not supported." + "Use a model identifier or the path to a directory instead." ) logger.warning( - "Calling {}.from_pretrained() with the path to a single file or url is deprecated".format( - cls.__name__ - ) + f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is deprecated" ) file_id = list(cls.vocab_files_names.keys())[0] vocab_files[file_id] = pretrained_model_name_or_path diff --git a/templates/adding_a_new_example_script/run_xxx.py b/templates/adding_a_new_example_script/run_xxx.py index a4047c865a..6a86685f7d 100644 --- a/templates/adding_a_new_example_script/run_xxx.py +++ b/templates/adding_a_new_example_script/run_xxx.py @@ -63,8 +63,6 @@ logger = logging.getLogger(__name__) MODEL_CONFIG_CLASSES = list(MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()) MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) -ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in MODEL_CONFIG_CLASSES), (),) - def set_seed(args): random.seed(args.seed) @@ -411,7 +409,7 @@ def main(): default=None, type=str, required=True, - help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), + help="Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--output_dir", diff --git a/templates/adding_a_new_model/configuration_xxx.py b/templates/adding_a_new_model/configuration_xxx.py index d23bce43d2..683b52628a 100644 --- a/templates/adding_a_new_model/configuration_xxx.py +++ b/templates/adding_a_new_model/configuration_xxx.py @@ -57,7 +57,6 @@ class XxxConfig(PretrainedConfig): initializing all weight matrices. layer_norm_eps: The epsilon used by LayerNorm. """ - pretrained_config_archive_map = XXX_PRETRAINED_CONFIG_ARCHIVE_MAP model_type = "xxx" def __init__( diff --git a/templates/adding_a_new_model/modeling_tf_xxx.py b/templates/adding_a_new_model/modeling_tf_xxx.py index 4974f36faf..757f13ee6d 100644 --- a/templates/adding_a_new_model/modeling_tf_xxx.py +++ b/templates/adding_a_new_model/modeling_tf_xxx.py @@ -32,13 +32,13 @@ from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list logger = logging.getLogger(__name__) #################################################### -# This dict contrains shortcut names and associated url -# for the pretrained weights provided with the models +# This list contrains shortcut names for some of +# the pretrained weights provided with the models #################################################### -TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP = { - "xxx-base-uncased": "https://cdn.huggingface.co/xxx-base-uncased-tf_model.h5", - "xxx-large-uncased": "https://cdn.huggingface.co/xxx-large-uncased-tf_model.h5", -} +TF_XXX_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "xxx-base-uncased", + "xxx-large-uncased", +] #################################################### @@ -180,7 +180,6 @@ class TFXxxPreTrainedModel(TFPreTrainedModel): """ config_class = XxxConfig - pretrained_model_archive_map = TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "transformer" diff --git a/templates/adding_a_new_model/modeling_xxx.py b/templates/adding_a_new_model/modeling_xxx.py index 1c62401746..486a7bfa58 100644 --- a/templates/adding_a_new_model/modeling_xxx.py +++ b/templates/adding_a_new_model/modeling_xxx.py @@ -34,13 +34,13 @@ from .modeling_utils import PreTrainedModel logger = logging.getLogger(__name__) #################################################### -# This dict contrains shortcut names and associated url -# for the pretrained weights provided with the models +# This list contrains shortcut names for some of +# the pretrained weights provided with the models #################################################### -XXX_PRETRAINED_MODEL_ARCHIVE_MAP = { - "xxx-base-uncased": "https://cdn.huggingface.co/xxx-base-uncased-pytorch_model.bin", - "xxx-large-uncased": "https://cdn.huggingface.co/xxx-large-uncased-pytorch_model.bin", -} +XXX_PRETRAINED_MODEL_ARCHIVE_LIST = [ + "xxx-base-uncased", + "xxx-large-uncased", +] #################################################### @@ -180,7 +180,6 @@ class XxxPreTrainedModel(PreTrainedModel): """ config_class = XxxConfig - pretrained_model_archive_map = XXX_PRETRAINED_MODEL_ARCHIVE_MAP load_tf_weights = load_tf_weights_in_xxx base_model_prefix = "transformer" diff --git a/templates/adding_a_new_model/tests/test_modeling_xxx.py b/templates/adding_a_new_model/tests/test_modeling_xxx.py index 281a9226fc..2d5530407e 100644 --- a/templates/adding_a_new_model/tests/test_modeling_xxx.py +++ b/templates/adding_a_new_model/tests/test_modeling_xxx.py @@ -32,7 +32,7 @@ if is_torch_available(): XxxForSequenceClassification, XxxForTokenClassification, ) - from transformers.modeling_xxx import XXX_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_xxx import XXX_PRETRAINED_MODEL_ARCHIVE_LIST @require_torch @@ -269,6 +269,6 @@ class XxxModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(XXX_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in XXX_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = XxxModel.from_pretrained(model_name, cache_dir=CACHE_DIR) self.assertIsNotNone(model) diff --git a/tests/test_modeling_albert.py b/tests/test_modeling_albert.py index 9ac64c56c6..f1352338b9 100644 --- a/tests/test_modeling_albert.py +++ b/tests/test_modeling_albert.py @@ -33,7 +33,7 @@ if is_torch_available(): AlbertForTokenClassification, AlbertForQuestionAnswering, ) - from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST @require_torch @@ -295,6 +295,6 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = AlbertModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_auto.py b/tests/test_modeling_auto.py index 4c7779344c..b933d6e5d3 100644 --- a/tests/test_modeling_auto.py +++ b/tests/test_modeling_auto.py @@ -40,7 +40,7 @@ if is_torch_available(): AutoModelForTokenClassification, BertForTokenClassification, ) - from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_auto import ( MODEL_MAPPING, MODEL_FOR_PRETRAINING_MAPPING, @@ -56,7 +56,7 @@ class AutoModelTest(unittest.TestCase): @slow def test_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, BertConfig) @@ -71,7 +71,7 @@ class AutoModelTest(unittest.TestCase): @slow def test_model_for_pretraining_from_pretrained(self): logging.basicConfig(level=logging.INFO) - for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, BertConfig) @@ -87,7 +87,7 @@ class AutoModelTest(unittest.TestCase): @slow def test_lmhead_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, BertConfig) @@ -100,7 +100,7 @@ class AutoModelTest(unittest.TestCase): @slow def test_sequence_classification_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, BertConfig) @@ -115,7 +115,7 @@ class AutoModelTest(unittest.TestCase): @slow def test_question_answering_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, BertConfig) @@ -128,7 +128,7 @@ class AutoModelTest(unittest.TestCase): @slow def test_token_classification_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, BertConfig) diff --git a/tests/test_modeling_bart.py b/tests/test_modeling_bart.py index 49989ed402..b33fd55088 100644 --- a/tests/test_modeling_bart.py +++ b/tests/test_modeling_bart.py @@ -39,7 +39,7 @@ if is_torch_available(): MBartTokenizer, ) from transformers.modeling_bart import ( - BART_PRETRAINED_MODEL_ARCHIVE_MAP, + BART_PRETRAINED_MODEL_ARCHIVE_LIST, shift_tokens_right, invert_mask, _prepare_bart_decoder_inputs, @@ -261,7 +261,7 @@ class BartTranslationTests(unittest.TestCase): self.assertEqual(expected_translation_romanian, decoded[0]) def test_mbart_enro_config(self): - mbart_models = ["mbart-large-en-ro"] + mbart_models = ["facebook/mbart-large-en-ro"] expected = {"scale_embedding": True, "output_past": True} for name in mbart_models: config = BartConfig.from_pretrained(name) @@ -561,7 +561,7 @@ class BartModelIntegrationTests(unittest.TestCase): @unittest.skip("This is just too slow") def test_model_from_pretrained(self): # Forces 1.6GB download from S3 for each model - for model_name in list(BART_PRETRAINED_MODEL_ARCHIVE_MAP.keys()): + for model_name in BART_PRETRAINED_MODEL_ARCHIVE_LIST: model = BartModel.from_pretrained(model_name) self.assertIsNotNone(model) @@ -593,7 +593,7 @@ class BartModelIntegrationTests(unittest.TestCase): self.assertEqual(EXPECTED_SUMMARY, decoded[0]) def test_xsum_config_generation_params(self): - config = BartConfig.from_pretrained("bart-large-xsum") + config = BartConfig.from_pretrained("facebook/bart-large-xsum") expected_params = dict(num_beams=6, do_sample=False, early_stopping=True, length_penalty=1.0) config_params = {k: getattr(config, k, "MISSING") for k, v in expected_params.items()} self.assertDictEqual(expected_params, config_params) diff --git a/tests/test_modeling_bert.py b/tests/test_modeling_bert.py index f45d786a9b..fc994c46be 100644 --- a/tests/test_modeling_bert.py +++ b/tests/test_modeling_bert.py @@ -35,7 +35,7 @@ if is_torch_available(): BertForTokenClassification, BertForMultipleChoice, ) - from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_LIST class BertModelTester: @@ -494,6 +494,6 @@ class BertModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = BertModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 009350564a..ceca9d43ea 100644 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -36,7 +36,7 @@ if is_torch_available(): PreTrainedModel, BertModel, BertConfig, - BERT_PRETRAINED_MODEL_ARCHIVE_MAP, + BERT_PRETRAINED_MODEL_ARCHIVE_LIST, top_k_top_p_filtering, ) @@ -824,7 +824,7 @@ class ModelUtilsTest(unittest.TestCase): @slow def test_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: config = BertConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, PretrainedConfig) diff --git a/tests/test_modeling_ctrl.py b/tests/test_modeling_ctrl.py index c29a0d3723..b825fc90dc 100644 --- a/tests/test_modeling_ctrl.py +++ b/tests/test_modeling_ctrl.py @@ -24,7 +24,7 @@ from .utils import require_torch, slow, torch_device if is_torch_available(): import torch - from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel + from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, CTRLLMHeadModel @require_torch @@ -210,7 +210,7 @@ class CTRLModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in CTRL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = CTRLModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_distilbert.py b/tests/test_modeling_distilbert.py index b059d2a6de..7b9f20fedf 100644 --- a/tests/test_modeling_distilbert.py +++ b/tests/test_modeling_distilbert.py @@ -247,6 +247,6 @@ class DistilBertModelTest(ModelTesterMixin, unittest.TestCase): # @slow # def test_model_from_pretrained(self): - # for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: # model = DistilBertModel.from_pretrained(model_name) # self.assertIsNotNone(model) diff --git a/tests/test_modeling_electra.py b/tests/test_modeling_electra.py index 3df77a25da..2639676742 100644 --- a/tests/test_modeling_electra.py +++ b/tests/test_modeling_electra.py @@ -32,7 +32,7 @@ if is_torch_available(): ElectraForPreTraining, ElectraForSequenceClassification, ) - from transformers.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST @require_torch @@ -312,6 +312,6 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = ElectraModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_flaubert.py b/tests/test_modeling_flaubert.py index 15f4c49d5a..5dabea0c10 100644 --- a/tests/test_modeling_flaubert.py +++ b/tests/test_modeling_flaubert.py @@ -32,7 +32,7 @@ if is_torch_available(): FlaubertForQuestionAnsweringSimple, FlaubertForSequenceClassification, ) - from transformers.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST @require_torch @@ -387,6 +387,6 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = FlaubertModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_gpt2.py b/tests/test_modeling_gpt2.py index 6939e1fe56..4f4dedb32e 100644 --- a/tests/test_modeling_gpt2.py +++ b/tests/test_modeling_gpt2.py @@ -28,7 +28,7 @@ if is_torch_available(): from transformers import ( GPT2Config, GPT2Model, - GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, + GPT2_PRETRAINED_MODEL_ARCHIVE_LIST, GPT2LMHeadModel, GPT2DoubleHeadsModel, ) @@ -334,7 +334,7 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = GPT2Model.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_openai.py b/tests/test_modeling_openai.py index cb8c3dc819..4d4191b4a1 100644 --- a/tests/test_modeling_openai.py +++ b/tests/test_modeling_openai.py @@ -28,7 +28,7 @@ if is_torch_available(): from transformers import ( OpenAIGPTConfig, OpenAIGPTModel, - OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, + OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel, ) @@ -218,7 +218,7 @@ class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = OpenAIGPTModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_reformer.py b/tests/test_modeling_reformer.py index 0ffc21abcf..60ba91ada9 100644 --- a/tests/test_modeling_reformer.py +++ b/tests/test_modeling_reformer.py @@ -29,7 +29,7 @@ if is_torch_available(): ReformerModelWithLMHead, ReformerTokenizer, ReformerLayer, - REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP, + REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, ) import torch @@ -503,7 +503,7 @@ class ReformerLocalAttnModelTest(ReformerTesterMixin, ModelTesterMixin, unittest @slow def test_model_from_pretrained(self): - for model_name in list(REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = ReformerModelWithLMHead.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_roberta.py b/tests/test_modeling_roberta.py index cb24039d68..d739cfda76 100644 --- a/tests/test_modeling_roberta.py +++ b/tests/test_modeling_roberta.py @@ -33,7 +33,7 @@ if is_torch_available(): RobertaForTokenClassification, ) from transformers.modeling_roberta import RobertaEmbeddings, RobertaForMultipleChoice, RobertaForQuestionAnswering - from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_utils import create_position_ids_from_input_ids @@ -273,7 +273,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = RobertaModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_t5.py b/tests/test_modeling_t5.py index fb9f450aeb..5c7544f564 100644 --- a/tests/test_modeling_t5.py +++ b/tests/test_modeling_t5.py @@ -26,7 +26,7 @@ from .utils import require_torch, slow, torch_device if is_torch_available(): import torch from transformers import T5Config, T5Model, T5ForConditionalGeneration - from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.tokenization_t5 import T5Tokenizer @@ -372,7 +372,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(T5_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in T5_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = T5Model.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_albert.py b/tests/test_modeling_tf_albert.py index 43beb4b709..04983ab5ac 100644 --- a/tests/test_modeling_tf_albert.py +++ b/tests/test_modeling_tf_albert.py @@ -30,7 +30,7 @@ if is_tf_available(): TFAlbertForMaskedLM, TFAlbertForSequenceClassification, TFAlbertForQuestionAnswering, - TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) @@ -257,6 +257,6 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFAlbertModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_auto.py b/tests/test_modeling_tf_auto.py index 6994f6eaa9..2aaec4b0b1 100644 --- a/tests/test_modeling_tf_auto.py +++ b/tests/test_modeling_tf_auto.py @@ -49,7 +49,7 @@ class TFAutoModelTest(unittest.TestCase): self.assertTrue(h5py.version.hdf5_version.startswith("1.10")) logging.basicConfig(level=logging.INFO) - # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["bert-base-uncased"]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) @@ -66,7 +66,7 @@ class TFAutoModelTest(unittest.TestCase): self.assertTrue(h5py.version.hdf5_version.startswith("1.10")) logging.basicConfig(level=logging.INFO) - # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["bert-base-uncased"]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) @@ -79,7 +79,7 @@ class TFAutoModelTest(unittest.TestCase): @slow def test_lmhead_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["bert-base-uncased"]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) @@ -92,7 +92,7 @@ class TFAutoModelTest(unittest.TestCase): @slow def test_sequence_classification_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["bert-base-uncased"]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) @@ -105,7 +105,7 @@ class TFAutoModelTest(unittest.TestCase): @slow def test_question_answering_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) - # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["bert-base-uncased"]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) diff --git a/tests/test_modeling_tf_bert.py b/tests/test_modeling_tf_bert.py index 6ab7c2b2ca..241def4c68 100644 --- a/tests/test_modeling_tf_bert.py +++ b/tests/test_modeling_tf_bert.py @@ -311,7 +311,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["bert-base-uncased"]: model = TFBertModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_ctrl.py b/tests/test_modeling_tf_ctrl.py index a55a52365c..e375800396 100644 --- a/tests/test_modeling_tf_ctrl.py +++ b/tests/test_modeling_tf_ctrl.py @@ -25,7 +25,7 @@ from .utils import require_tf, slow if is_tf_available(): import tensorflow as tf - from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST @require_tf @@ -200,7 +200,7 @@ class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFCTRLModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_distilbert.py b/tests/test_modeling_tf_distilbert.py index 2a67ffbfc9..c513885f29 100644 --- a/tests/test_modeling_tf_distilbert.py +++ b/tests/test_modeling_tf_distilbert.py @@ -218,6 +218,6 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase): # @slow # def test_model_from_pretrained(self): - # for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: # model = DistilBertModesss.from_pretrained(model_name) # self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_electra.py b/tests/test_modeling_tf_electra.py index a480241fe2..3f79811b48 100644 --- a/tests/test_modeling_tf_electra.py +++ b/tests/test_modeling_tf_electra.py @@ -221,7 +221,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - # for model_name in list(TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + # for model_name in TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in ["google/electra-small-discriminator"]: model = TFElectraModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_gpt2.py b/tests/test_modeling_tf_gpt2.py index df2f7c1880..8177789a2e 100644 --- a/tests/test_modeling_tf_gpt2.py +++ b/tests/test_modeling_tf_gpt2.py @@ -29,7 +29,7 @@ if is_tf_available(): TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel, - TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST, shape_list, ) @@ -323,7 +323,7 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFGPT2Model.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_openai_gpt.py b/tests/test_modeling_tf_openai_gpt.py index 8b91f753a0..a7573082cd 100644 --- a/tests/test_modeling_tf_openai_gpt.py +++ b/tests/test_modeling_tf_openai_gpt.py @@ -29,7 +29,7 @@ if is_tf_available(): TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel, - TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST, ) @@ -235,7 +235,7 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFOpenAIGPTModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_roberta.py b/tests/test_modeling_tf_roberta.py index 7dd89f14d4..6fbb9a33f6 100644 --- a/tests/test_modeling_tf_roberta.py +++ b/tests/test_modeling_tf_roberta.py @@ -32,7 +32,7 @@ if is_tf_available(): TFRobertaForSequenceClassification, TFRobertaForTokenClassification, TFRobertaForQuestionAnswering, - TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST, ) @@ -232,7 +232,7 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFRobertaModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_transfo_xl.py b/tests/test_modeling_tf_transfo_xl.py index fd5b71d593..f1a0107106 100644 --- a/tests/test_modeling_tf_transfo_xl.py +++ b/tests/test_modeling_tf_transfo_xl.py @@ -29,7 +29,7 @@ if is_tf_available(): from transformers import ( TFTransfoXLModel, TFTransfoXLLMHeadModel, - TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST, ) @@ -209,7 +209,7 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFTransfoXLModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_xlm.py b/tests/test_modeling_tf_xlm.py index a959aae6bc..eff929858c 100644 --- a/tests/test_modeling_tf_xlm.py +++ b/tests/test_modeling_tf_xlm.py @@ -31,7 +31,7 @@ if is_tf_available(): TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple, - TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST, ) @@ -308,7 +308,7 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TF_XLM_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFXLMModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_tf_xlnet.py b/tests/test_modeling_tf_xlnet.py index df19822085..762f0b49b3 100644 --- a/tests/test_modeling_tf_xlnet.py +++ b/tests/test_modeling_tf_xlnet.py @@ -33,7 +33,7 @@ if is_tf_available(): TFXLNetForSequenceClassification, TFXLNetForTokenClassification, TFXLNetForQuestionAnsweringSimple, - TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP, + TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST, ) @@ -410,7 +410,7 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TFXLNetModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_transfo_xl.py b/tests/test_modeling_transfo_xl.py index 343993f435..3c058ec34d 100644 --- a/tests/test_modeling_transfo_xl.py +++ b/tests/test_modeling_transfo_xl.py @@ -27,7 +27,7 @@ from .utils import require_multigpu, require_torch, slow, torch_device if is_torch_available(): import torch from transformers import TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel - from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST @require_torch @@ -214,7 +214,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TransfoXLModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_xlm.py b/tests/test_modeling_xlm.py index 3504345208..5d79ce3ae3 100644 --- a/tests/test_modeling_xlm.py +++ b/tests/test_modeling_xlm.py @@ -34,7 +34,7 @@ if is_torch_available(): XLMForSequenceClassification, XLMForQuestionAnsweringSimple, ) - from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST @require_torch @@ -425,7 +425,7 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in XLM_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = XLMModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_modeling_xlnet.py b/tests/test_modeling_xlnet.py index ef557baf66..1487a720a1 100644 --- a/tests/test_modeling_xlnet.py +++ b/tests/test_modeling_xlnet.py @@ -35,7 +35,7 @@ if is_torch_available(): XLNetForTokenClassification, XLNetForQuestionAnswering, ) - from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP + from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST @require_torch @@ -508,7 +508,7 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase): @slow def test_model_from_pretrained(self): - for model_name in list(XLNET_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + for model_name in XLNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = XLNetModel.from_pretrained(model_name) self.assertIsNotNone(model) diff --git a/tests/test_tokenization_bert_japanese.py b/tests/test_tokenization_bert_japanese.py index 71af8a5513..7cc5db6374 100644 --- a/tests/test_tokenization_bert_japanese.py +++ b/tests/test_tokenization_bert_japanese.py @@ -127,7 +127,7 @@ class BertJapaneseTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.assertListEqual(tokenizer.tokenize("こんばんは こんばんにちは こんにちは"), ["こん", "##ばんは", "[UNK]", "こんにちは"]) def test_sequence_builders(self): - tokenizer = self.tokenizer_class.from_pretrained("bert-base-japanese") + tokenizer = self.tokenizer_class.from_pretrained("cl-tohoku/bert-base-japanese") text = tokenizer.encode("ありがとう。", add_special_tokens=False) text_2 = tokenizer.encode("どういたしまして。", add_special_tokens=False) @@ -188,7 +188,7 @@ class BertJapaneseCharacterTokenizationTest(TokenizerTesterMixin, unittest.TestC self.assertListEqual(tokenizer.tokenize("こんにちほ"), ["こ", "ん", "に", "ち", "[UNK]"]) def test_sequence_builders(self): - tokenizer = self.tokenizer_class.from_pretrained("bert-base-japanese-char") + tokenizer = self.tokenizer_class.from_pretrained("cl-tohoku/bert-base-japanese-char") text = tokenizer.encode("ありがとう。", add_special_tokens=False) text_2 = tokenizer.encode("どういたしまして。", add_special_tokens=False)