From 02a77fa04c8ed497fd598af2b89799fcec5f12ae Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Fri, 3 Mar 2023 14:43:44 +0100 Subject: [PATCH] Cleanup more auto mapping names (#21909) * fix auto 2 * fix auto 2 * fix task guide issue * fix --------- Co-authored-by: ydshieh --- src/transformers/models/auto/modeling_auto.py | 3 -- utils/check_repo.py | 31 +++++++++++-------- utils/check_task_guides.py | 14 +++++++-- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index be30298650..b697614834 100755 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -303,7 +303,6 @@ MODEL_WITH_LM_HEAD_MAPPING_NAMES = OrderedDict( ("mpnet", "MPNetForMaskedLM"), ("mvp", "MvpForConditionalGeneration"), ("nezha", "NezhaForMaskedLM"), - ("nllb", "M2M100ForConditionalGeneration"), ("nystromformer", "NystromformerForMaskedLM"), ("openai-gpt", "OpenAIGPTLMHeadModel"), ("pegasus_x", "PegasusXForConditionalGeneration"), @@ -594,7 +593,6 @@ MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES = OrderedDict( ("mbart", "MBartForConditionalGeneration"), ("mt5", "MT5ForConditionalGeneration"), ("mvp", "MvpForConditionalGeneration"), - ("nllb", "M2M100ForConditionalGeneration"), ("pegasus", "PegasusForConditionalGeneration"), ("pegasus_x", "PegasusXForConditionalGeneration"), ("plbart", "PLBartForConditionalGeneration"), @@ -938,7 +936,6 @@ MODEL_FOR_BACKBONE_MAPPING_NAMES = OrderedDict( ("bit", "BitBackbone"), ("convnext", "ConvNextBackbone"), ("dinat", "DinatBackbone"), - ("efficientnet", "EfficientNetBackbone"), ("maskformer-swin", "MaskFormerSwinBackbone"), ("nat", "NatBackbone"), ("resnet", "ResNetBackbone"), diff --git a/utils/check_repo.py b/utils/check_repo.py index 75e24cfa01..4f653097d9 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -26,9 +26,6 @@ from transformers.models.auto import get_values from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES from transformers.models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING_NAMES from transformers.models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING_NAMES -from transformers.models.auto.modeling_auto import MODEL_MAPPING_NAMES -from transformers.models.auto.modeling_flax_auto import FLAX_MODEL_MAPPING_NAMES -from transformers.models.auto.modeling_tf_auto import TF_MODEL_MAPPING_NAMES from transformers.models.auto.processing_auto import PROCESSOR_MAPPING_NAMES from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING_NAMES from transformers.utils import ENV_VARS_TRUE_VALUES, direct_transformers_import @@ -617,17 +614,21 @@ def check_all_auto_object_names_being_defined(): """Check all names defined in auto (name) mappings exist in the library.""" failures = [] - mapping_to_check = { + mappings_to_check = { "TOKENIZER_MAPPING_NAMES": TOKENIZER_MAPPING_NAMES, "IMAGE_PROCESSOR_MAPPING_NAMES": IMAGE_PROCESSOR_MAPPING_NAMES, "FEATURE_EXTRACTOR_MAPPING_NAMES": FEATURE_EXTRACTOR_MAPPING_NAMES, "PROCESSOR_MAPPING_NAMES": PROCESSOR_MAPPING_NAMES, - "MODEL_MAPPING_NAMES": MODEL_MAPPING_NAMES, - "TF_MODEL_MAPPING_NAMES": TF_MODEL_MAPPING_NAMES, - "FLAX_MODEL_MAPPING_NAMES": FLAX_MODEL_MAPPING_NAMES, } - for name, mapping in mapping_to_check.items(): + # Each auto modeling files contains multiple mappings. Let's get them in a dynamic way. + for module_name in ["modeling_auto", "modeling_tf_auto", "modeling_flax_auto"]: + module = getattr(transformers.models.auto, module_name) + # all mappings in a single auto modeling file + mapping_names = [x for x in dir(module) if x.endswith("_MAPPING_NAMES")] + mappings_to_check.update({name: getattr(module, name) for name in mapping_names}) + + for name, mapping in mappings_to_check.items(): for model_type, class_names in mapping.items(): if not isinstance(class_names, tuple): class_names = (class_names,) @@ -652,16 +653,20 @@ def check_all_auto_mapping_names_in_config_mapping_names(): failures = [] # `TOKENIZER_PROCESSOR_MAPPING_NAMES` and `AutoTokenizer` is special, and don't need to follow the rule. - mapping_to_check = { + mappings_to_check = { "IMAGE_PROCESSOR_MAPPING_NAMES": IMAGE_PROCESSOR_MAPPING_NAMES, "FEATURE_EXTRACTOR_MAPPING_NAMES": FEATURE_EXTRACTOR_MAPPING_NAMES, "PROCESSOR_MAPPING_NAMES": PROCESSOR_MAPPING_NAMES, - "MODEL_MAPPING_NAMES": MODEL_MAPPING_NAMES, - "TF_MODEL_MAPPING_NAMES": TF_MODEL_MAPPING_NAMES, - "FLAX_MODEL_MAPPING_NAMES": FLAX_MODEL_MAPPING_NAMES, } - for name, mapping in mapping_to_check.items(): + # Each auto modeling files contains multiple mappings. Let's get them in a dynamic way. + for module_name in ["modeling_auto", "modeling_tf_auto", "modeling_flax_auto"]: + module = getattr(transformers.models.auto, module_name) + # all mappings in a single auto modeling file + mapping_names = [x for x in dir(module) if x.endswith("_MAPPING_NAMES")] + mappings_to_check.update({name: getattr(module, name) for name in mapping_names}) + + for name, mapping in mappings_to_check.items(): for model_type, class_names in mapping.items(): if model_type not in CONFIG_MAPPING_NAMES: failures.append( diff --git a/utils/check_task_guides.py b/utils/check_task_guides.py index c48b7ca16c..3800123416 100644 --- a/utils/check_task_guides.py +++ b/utils/check_task_guides.py @@ -72,14 +72,24 @@ TASK_GUIDE_TO_MODELS = { "document_question_answering.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES, } +# This list contains model types used in some task guides that are not in `CONFIG_MAPPING_NAMES` (therefore not in any +# `MODEL_MAPPING_NAMES` or any `MODEL_FOR_XXX_MAPPING_NAMES`). +SPECIAL_TASK_GUIDE_TO_MODEL_TYPES = { + "summarization.mdx": ("nllb",), + "translation.mdx": ("nllb",), +} + def get_model_list_for_task(task_guide): """ Return the list of models supporting given task. """ - config_maping_names = TASK_GUIDE_TO_MODELS[task_guide] + model_maping_names = TASK_GUIDE_TO_MODELS[task_guide] + special_model_types = SPECIAL_TASK_GUIDE_TO_MODEL_TYPES.get(task_guide, set()) model_names = { - code: name for code, name in transformers_module.MODEL_NAMES_MAPPING.items() if code in config_maping_names + code: name + for code, name in transformers_module.MODEL_NAMES_MAPPING.items() + if (code in model_maping_names or code in special_model_types) } return ", ".join([f"[{name}](../model_doc/{code})" for code, name in model_names.items()]) + "\n"