[WIP] Disentangle auto modules from other modeling files (#13023)
* Initial work * All auto models * All tf auto models * All flax auto models * Tokenizers * Add feature extractors * Fix typos * Fix other typo * Use the right config * Remove old mapping names and update logic in AutoTokenizer * Update check_table * Fix copies and check_repo script * Fix last test * Add back name * clean up * Update template * Update template * Forgot a ) * Use alternative to fixup * Fix TF model template * Address review comments * Address review comments * Style
This commit is contained in:
@@ -306,17 +306,17 @@ def get_all_auto_configured_models():
|
||||
result = set() # To avoid duplicates we concatenate all model classes in a set.
|
||||
if is_torch_available():
|
||||
for attr_name in dir(transformers.models.auto.modeling_auto):
|
||||
if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING"):
|
||||
if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING_NAMES"):
|
||||
result = result | set(get_values(getattr(transformers.models.auto.modeling_auto, attr_name)))
|
||||
if is_tf_available():
|
||||
for attr_name in dir(transformers.models.auto.modeling_tf_auto):
|
||||
if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING"):
|
||||
if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING_NAMES"):
|
||||
result = result | set(get_values(getattr(transformers.models.auto.modeling_tf_auto, attr_name)))
|
||||
if is_flax_available():
|
||||
for attr_name in dir(transformers.models.auto.modeling_flax_auto):
|
||||
if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING"):
|
||||
if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING_NAMES"):
|
||||
result = result | set(get_values(getattr(transformers.models.auto.modeling_flax_auto, attr_name)))
|
||||
return [cls.__name__ for cls in result]
|
||||
return [cls for cls in result]
|
||||
|
||||
|
||||
def ignore_unautoclassed(model_name):
|
||||
|
||||
@@ -87,12 +87,13 @@ def get_model_table_from_auto_modules():
|
||||
transformers = spec.loader.load_module()
|
||||
|
||||
# Dictionary model names to config.
|
||||
config_maping_names = transformers.models.auto.configuration_auto.CONFIG_MAPPING_NAMES
|
||||
model_name_to_config = {
|
||||
name: transformers.CONFIG_MAPPING[code] for code, name in transformers.MODEL_NAMES_MAPPING.items()
|
||||
}
|
||||
model_name_to_prefix = {
|
||||
name: config.__name__.replace("Config", "") for name, config in model_name_to_config.items()
|
||||
name: config_maping_names[code]
|
||||
for code, name in transformers.MODEL_NAMES_MAPPING.items()
|
||||
if code in config_maping_names
|
||||
}
|
||||
model_name_to_prefix = {name: config.replace("Config", "") for name, config in model_name_to_config.items()}
|
||||
|
||||
# Dictionaries flagging if each model prefix has a slow/fast tokenizer, backend in PT/TF/Flax.
|
||||
slow_tokenizers = collections.defaultdict(bool)
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# this script remaps classes to class strings so that it's quick to load such maps and not require
|
||||
# loading all possible modeling files
|
||||
#
|
||||
# it can be extended to auto-generate other dicts that are needed at runtime
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
from os.path import abspath, dirname, join
|
||||
|
||||
|
||||
git_repo_path = abspath(join(dirname(dirname(__file__)), "src"))
|
||||
sys.path.insert(1, git_repo_path)
|
||||
|
||||
src = "src/transformers/models/auto/modeling_auto.py"
|
||||
dst = "src/transformers/utils/modeling_auto_mapping.py"
|
||||
|
||||
|
||||
if os.path.exists(dst) and os.path.getmtime(src) < os.path.getmtime(dst):
|
||||
# speed things up by only running this script if the src is newer than dst
|
||||
sys.exit(0)
|
||||
|
||||
# only load if needed
|
||||
from transformers.models.auto.modeling_auto import ( # noqa
|
||||
MODEL_FOR_CAUSAL_LM_MAPPING,
|
||||
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||
MODEL_FOR_MASKED_LM_MAPPING,
|
||||
MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
|
||||
MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING,
|
||||
MODEL_FOR_OBJECT_DETECTION_MAPPING,
|
||||
MODEL_FOR_PRETRAINING_MAPPING,
|
||||
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
|
||||
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
|
||||
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
|
||||
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
|
||||
MODEL_MAPPING,
|
||||
MODEL_WITH_LM_HEAD_MAPPING,
|
||||
)
|
||||
|
||||
|
||||
# Those constants don't have a name attribute, so we need to define it manually
|
||||
mappings = {
|
||||
"MODEL_FOR_QUESTION_ANSWERING_MAPPING": MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
"MODEL_FOR_CAUSAL_LM_MAPPING": MODEL_FOR_CAUSAL_LM_MAPPING,
|
||||
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING": MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||
"MODEL_FOR_MASKED_LM_MAPPING": MODEL_FOR_MASKED_LM_MAPPING,
|
||||
"MODEL_FOR_MULTIPLE_CHOICE_MAPPING": MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
|
||||
"MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING": MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING,
|
||||
"MODEL_FOR_OBJECT_DETECTION_MAPPING": MODEL_FOR_OBJECT_DETECTION_MAPPING,
|
||||
"MODEL_FOR_OBJECT_DETECTION_MAPPING": MODEL_FOR_OBJECT_DETECTION_MAPPING,
|
||||
"MODEL_FOR_QUESTION_ANSWERING_MAPPING": MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
"MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING": MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
|
||||
"MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING": MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
|
||||
"MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING": MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
|
||||
"MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING": MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
|
||||
"MODEL_MAPPING": MODEL_MAPPING,
|
||||
"MODEL_WITH_LM_HEAD_MAPPING": MODEL_WITH_LM_HEAD_MAPPING,
|
||||
}
|
||||
|
||||
|
||||
def get_name(value):
|
||||
if isinstance(value, tuple):
|
||||
return tuple(get_name(o) for o in value)
|
||||
return value.__name__
|
||||
|
||||
|
||||
content = [
|
||||
"# THIS FILE HAS BEEN AUTOGENERATED. To update:",
|
||||
"# 1. modify: models/auto/modeling_auto.py",
|
||||
"# 2. run: python utils/class_mapping_update.py",
|
||||
"from collections import OrderedDict",
|
||||
"",
|
||||
]
|
||||
|
||||
for name, mapping in mappings.items():
|
||||
entries = "\n".join([f' ("{k.__name__}", "{get_name(v)}"),' for k, v in mapping.items()])
|
||||
|
||||
content += [
|
||||
"",
|
||||
f"{name}_NAMES = OrderedDict(",
|
||||
" [",
|
||||
entries,
|
||||
" ]",
|
||||
")",
|
||||
"",
|
||||
]
|
||||
|
||||
print(f"Updating {dst}")
|
||||
with open(dst, "w", encoding="utf-8", newline="\n") as f:
|
||||
f.write("\n".join(content))
|
||||
Reference in New Issue
Block a user