[WIP] Disentangle auto modules from other modeling files (#13023)

* Initial work * All auto models * All tf auto models * All flax auto models * Tokenizers * Add feature extractors * Fix typos * Fix other typo * Use the right config * Remove old mapping names and update logic in AutoTokenizer * Update check_table * Fix copies and check_repo script * Fix last test * Add back name * clean up * Update template * Update template * Forgot a ) * Use alternative to fixup * Fix TF model template * Address review comments * Address review comments * Style
2021-08-06 13:12:30 +02:00
parent 2e4082364e
commit 9870093f7b
26 changed files with 1338 additions and 2405 deletions
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -306,17 +306,17 @@ def get_all_auto_configured_models():
    result = set()  # To avoid duplicates we concatenate all model classes in a set.
    if is_torch_available():
        for attr_name in dir(transformers.models.auto.modeling_auto):
-            if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING"):
+            if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING_NAMES"):
                result = result | set(get_values(getattr(transformers.models.auto.modeling_auto, attr_name)))
    if is_tf_available():
        for attr_name in dir(transformers.models.auto.modeling_tf_auto):
-            if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING"):
+            if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING_NAMES"):
                result = result | set(get_values(getattr(transformers.models.auto.modeling_tf_auto, attr_name)))
    if is_flax_available():
        for attr_name in dir(transformers.models.auto.modeling_flax_auto):
-            if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING"):
+            if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING_NAMES"):
                result = result | set(get_values(getattr(transformers.models.auto.modeling_flax_auto, attr_name)))
-    return [cls.__name__ for cls in result]
+    return [cls for cls in result]


 def ignore_unautoclassed(model_name):
--- a/utils/check_table.py
+++ b/utils/check_table.py
@@ -87,12 +87,13 @@ def get_model_table_from_auto_modules():
    transformers = spec.loader.load_module()

    # Dictionary model names to config.
+    config_maping_names = transformers.models.auto.configuration_auto.CONFIG_MAPPING_NAMES
    model_name_to_config = {
-        name: transformers.CONFIG_MAPPING[code] for code, name in transformers.MODEL_NAMES_MAPPING.items()
-    }
-    model_name_to_prefix = {
-        name: config.__name__.replace("Config", "") for name, config in model_name_to_config.items()
+        name: config_maping_names[code]
+        for code, name in transformers.MODEL_NAMES_MAPPING.items()
+        if code in config_maping_names
    }
+    model_name_to_prefix = {name: config.replace("Config", "") for name, config in model_name_to_config.items()}

    # Dictionaries flagging if each model prefix has a slow/fast tokenizer, backend in PT/TF/Flax.
    slow_tokenizers = collections.defaultdict(bool)
--- a/utils/class_mapping_update.py
+++ b/utils/class_mapping_update.py
@@ -1,106 +0,0 @@
-# coding=utf-8
-# Copyright 2020 The HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this script remaps classes to class strings so that it's quick to load such maps and not require
-# loading all possible modeling files
-#
-# it can be extended to auto-generate other dicts that are needed at runtime
-
-
-import os
-import sys
-from os.path import abspath, dirname, join
-
-
-git_repo_path = abspath(join(dirname(dirname(__file__)), "src"))
-sys.path.insert(1, git_repo_path)
-
-src = "src/transformers/models/auto/modeling_auto.py"
-dst = "src/transformers/utils/modeling_auto_mapping.py"
-
-
-if os.path.exists(dst) and os.path.getmtime(src) < os.path.getmtime(dst):
-    # speed things up by only running this script if the src is newer than dst
-    sys.exit(0)
-
-# only load if needed
-from transformers.models.auto.modeling_auto import (  # noqa
-    MODEL_FOR_CAUSAL_LM_MAPPING,
-    MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
-    MODEL_FOR_MASKED_LM_MAPPING,
-    MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
-    MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING,
-    MODEL_FOR_OBJECT_DETECTION_MAPPING,
-    MODEL_FOR_PRETRAINING_MAPPING,
-    MODEL_FOR_QUESTION_ANSWERING_MAPPING,
-    MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
-    MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
-    MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
-    MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
-    MODEL_MAPPING,
-    MODEL_WITH_LM_HEAD_MAPPING,
-)
-
-
-# Those constants don't have a name attribute, so we need to define it manually
-mappings = {
-    "MODEL_FOR_QUESTION_ANSWERING_MAPPING": MODEL_FOR_QUESTION_ANSWERING_MAPPING,
-    "MODEL_FOR_CAUSAL_LM_MAPPING": MODEL_FOR_CAUSAL_LM_MAPPING,
-    "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING": MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
-    "MODEL_FOR_MASKED_LM_MAPPING": MODEL_FOR_MASKED_LM_MAPPING,
-    "MODEL_FOR_MULTIPLE_CHOICE_MAPPING": MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
-    "MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING": MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING,
-    "MODEL_FOR_OBJECT_DETECTION_MAPPING": MODEL_FOR_OBJECT_DETECTION_MAPPING,
-    "MODEL_FOR_OBJECT_DETECTION_MAPPING": MODEL_FOR_OBJECT_DETECTION_MAPPING,
-    "MODEL_FOR_QUESTION_ANSWERING_MAPPING": MODEL_FOR_QUESTION_ANSWERING_MAPPING,
-    "MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING": MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
-    "MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING": MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
-    "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING": MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
-    "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING": MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
-    "MODEL_MAPPING": MODEL_MAPPING,
-    "MODEL_WITH_LM_HEAD_MAPPING": MODEL_WITH_LM_HEAD_MAPPING,
-}
-
-
-def get_name(value):
-    if isinstance(value, tuple):
-        return tuple(get_name(o) for o in value)
-    return value.__name__
-
-
-content = [
-    "# THIS FILE HAS BEEN AUTOGENERATED. To update:",
-    "# 1. modify: models/auto/modeling_auto.py",
-    "# 2. run: python utils/class_mapping_update.py",
-    "from collections import OrderedDict",
-    "",
-]
-
-for name, mapping in mappings.items():
-    entries = "\n".join([f'        ("{k.__name__}", "{get_name(v)}"),' for k, v in mapping.items()])
-
-    content += [
-        "",
-        f"{name}_NAMES = OrderedDict(",
-        "    [",
-        entries,
-        "    ]",
-        ")",
-        "",
-    ]
-
-print(f"Updating {dst}")
-with open(dst, "w", encoding="utf-8", newline="\n") as f:
-    f.write("\n".join(content))