From f8823bad9a23f6623e91e71719e65342de877cb9 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Tue, 24 Mar 2020 17:46:25 -0400 Subject: [PATCH] Expose missing mappings (see #3415) --- examples/run_language_modeling.py | 12 ++++++++---- src/transformers/__init__.py | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/run_language_modeling.py b/examples/run_language_modeling.py index fa462c65ca..8e8694533c 100644 --- a/examples/run_language_modeling.py +++ b/examples/run_language_modeling.py @@ -38,7 +38,6 @@ from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm, trange from transformers import ( - CONFIG_MAPPING, MODEL_WITH_LM_HEAD_MAPPING, WEIGHTS_NAME, AdamW, @@ -679,7 +678,12 @@ def main(): elif args.model_name_or_path: config = AutoConfig.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir) else: - config = CONFIG_MAPPING[args.model_type]() + # When we release a pip version exposing CONFIG_MAPPING, + # we can do `config = CONFIG_MAPPING[args.model_type]()`. + raise ValueError( + "You are instantiating a new config instance from scratch. This is not supported, but you can do it from another script, save it," + "and load it from here, using --config_name" + ) if args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, cache_dir=args.cache_dir) @@ -687,8 +691,8 @@ def main(): tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir) else: raise ValueError( - "You are instantiating a new {} tokenizer. This is not supported, but you can do it from another script, save it," - "and load it from here, using --tokenizer_name".format(AutoTokenizer.__name__) + "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it," + "and load it from here, using --tokenizer_name" ) if args.block_size <= 0: diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index d1b3c31542..c6a929cb88 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -32,7 +32,7 @@ from .benchmark_utils import ( stop_memory_tracing, ) from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig -from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, AutoConfig +from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, AutoConfig from .configuration_bart import BartConfig from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig @@ -119,7 +119,7 @@ from .pipelines import ( pipeline, ) from .tokenization_albert import AlbertTokenizer -from .tokenization_auto import AutoTokenizer +from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from .tokenization_bart import BartTokenizer from .tokenization_bert import BasicTokenizer, BertTokenizer, BertTokenizerFast, WordpieceTokenizer from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer