[WIP] Disentangle auto modules from other modeling files (#13023)

* Initial work

* All auto models

* All tf auto models

* All flax auto models

* Tokenizers

* Add feature extractors

* Fix typos

* Fix other typo

* Use the right config

* Remove old mapping names and update logic in AutoTokenizer

* Update check_table

* Fix copies and check_repo script

* Fix last test

* Add back name

* clean up

* Update template

* Update template

* Forgot a )

* Use alternative to fixup

* Fix TF model template

* Address review comments

* Address review comments

* Style
This commit is contained in:
Sylvain Gugger
2021-08-06 13:12:30 +02:00
committed by GitHub
parent 2e4082364e
commit 9870093f7b
26 changed files with 1338 additions and 2405 deletions

View File

@@ -1781,25 +1781,22 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
if config_tokenizer_class is None:
# Third attempt. If we have not yet found the original type of the tokenizer,
# we are loading we see if we can infer it from the type of the configuration file
from .models.auto.configuration_auto import CONFIG_MAPPING # tests_ignore
from .models.auto.tokenization_auto import TOKENIZER_MAPPING # tests_ignore
from .models.auto.tokenization_auto import TOKENIZER_MAPPING_NAMES # tests_ignore
if hasattr(config, "model_type"):
config_class = CONFIG_MAPPING.get(config.model_type)
model_type = config.model_type
else:
# Fallback: use pattern matching on the string.
config_class = None
for pattern, config_class_tmp in CONFIG_MAPPING.items():
model_type = None
for pattern in TOKENIZER_MAPPING_NAMES.keys():
if pattern in str(pretrained_model_name_or_path):
config_class = config_class_tmp
model_type = pattern
break
if config_class in TOKENIZER_MAPPING.keys():
config_tokenizer_class, config_tokenizer_class_fast = TOKENIZER_MAPPING[config_class]
if config_tokenizer_class is not None:
config_tokenizer_class = config_tokenizer_class.__name__
else:
config_tokenizer_class = config_tokenizer_class_fast.__name__
if model_type is not None:
config_tokenizer_class, config_tokenizer_class_fast = TOKENIZER_MAPPING_NAMES[model_type]
if config_tokenizer_class is None:
config_tokenizer_class = config_tokenizer_class_fast
if config_tokenizer_class is not None:
if cls.__name__.replace("Fast", "") != config_tokenizer_class.replace("Fast", ""):