Add feature extraction mapping for automatic metadata update (#28944)
* add feature extraction mapping * added prefix * ruff check * minor fix * Update modeling_auto.py * fix typo * remove prefix to make variable public/importable * Update src/transformers/models/auto/modeling_auto.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fixes * addressed comments * nit * fix-copies * remove from tests * this should fix * Update tests/models/convnextv2/test_modeling_convnextv2.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * nits --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -1460,6 +1460,7 @@ else:
|
|||||||
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
|
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
|
||||||
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
|
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
|
||||||
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
|
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
|
||||||
|
"MODEL_FOR_IMAGE_MAPPING",
|
||||||
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
|
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
|
||||||
"MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
|
"MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
|
||||||
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
|
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
|
||||||
@@ -6203,6 +6204,7 @@ if TYPE_CHECKING:
|
|||||||
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
|
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
|
||||||
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
|
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
|
||||||
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||||
|
MODEL_FOR_IMAGE_MAPPING,
|
||||||
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
|
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
|
||||||
MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
|
MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
|
||||||
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
|
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ else:
|
|||||||
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
|
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
|
||||||
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
|
"MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
|
||||||
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
|
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
|
||||||
|
"MODEL_FOR_IMAGE_MAPPING",
|
||||||
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
|
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
|
||||||
"MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
|
"MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
|
||||||
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
|
"MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
|
||||||
@@ -233,6 +234,7 @@ if TYPE_CHECKING:
|
|||||||
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
|
MODEL_FOR_DEPTH_ESTIMATION_MAPPING,
|
||||||
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
|
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
|
||||||
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||||
|
MODEL_FOR_IMAGE_MAPPING,
|
||||||
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
|
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
|
||||||
MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
|
MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
|
||||||
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
|
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ from .configuration_auto import CONFIG_MAPPING_NAMES
|
|||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
MODEL_MAPPING_NAMES = OrderedDict(
|
MODEL_MAPPING_NAMES = OrderedDict(
|
||||||
[
|
[
|
||||||
# Base model mapping
|
# Base model mapping
|
||||||
@@ -478,6 +477,58 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
MODEL_FOR_IMAGE_MAPPING_NAMES = OrderedDict(
|
||||||
|
[
|
||||||
|
# Model for Image mapping
|
||||||
|
("beit", "BeitModel"),
|
||||||
|
("bit", "BitModel"),
|
||||||
|
("conditional_detr", "ConditionalDetrModel"),
|
||||||
|
("convnext", "ConvNextModel"),
|
||||||
|
("convnextv2", "ConvNextV2Model"),
|
||||||
|
("data2vec-vision", "Data2VecVisionModel"),
|
||||||
|
("deformable_detr", "DeformableDetrModel"),
|
||||||
|
("deit", "DeiTModel"),
|
||||||
|
("deta", "DetaModel"),
|
||||||
|
("detr", "DetrModel"),
|
||||||
|
("dinat", "DinatModel"),
|
||||||
|
("dinov2", "Dinov2Model"),
|
||||||
|
("dpt", "DPTModel"),
|
||||||
|
("efficientformer", "EfficientFormerModel"),
|
||||||
|
("efficientnet", "EfficientNetModel"),
|
||||||
|
("focalnet", "FocalNetModel"),
|
||||||
|
("glpn", "GLPNModel"),
|
||||||
|
("imagegpt", "ImageGPTModel"),
|
||||||
|
("levit", "LevitModel"),
|
||||||
|
("mobilenet_v1", "MobileNetV1Model"),
|
||||||
|
("mobilenet_v2", "MobileNetV2Model"),
|
||||||
|
("mobilevit", "MobileViTModel"),
|
||||||
|
("mobilevitv2", "MobileViTV2Model"),
|
||||||
|
("nat", "NatModel"),
|
||||||
|
("poolformer", "PoolFormerModel"),
|
||||||
|
("pvt", "PvtModel"),
|
||||||
|
("regnet", "RegNetModel"),
|
||||||
|
("resnet", "ResNetModel"),
|
||||||
|
("segformer", "SegformerModel"),
|
||||||
|
("siglip_vision_model", "SiglipVisionModel"),
|
||||||
|
("swiftformer", "SwiftFormerModel"),
|
||||||
|
("swin", "SwinModel"),
|
||||||
|
("swin2sr", "Swin2SRModel"),
|
||||||
|
("swinv2", "Swinv2Model"),
|
||||||
|
("table-transformer", "TableTransformerModel"),
|
||||||
|
("timesformer", "TimesformerModel"),
|
||||||
|
("timm_backbone", "TimmBackbone"),
|
||||||
|
("van", "VanModel"),
|
||||||
|
("videomae", "VideoMAEModel"),
|
||||||
|
("vit", "ViTModel"),
|
||||||
|
("vit_hybrid", "ViTHybridModel"),
|
||||||
|
("vit_mae", "ViTMAEModel"),
|
||||||
|
("vit_msn", "ViTMSNModel"),
|
||||||
|
("vitdet", "VitDetModel"),
|
||||||
|
("vivit", "VivitModel"),
|
||||||
|
("yolos", "YolosModel"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES = OrderedDict(
|
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES = OrderedDict(
|
||||||
[
|
[
|
||||||
("deit", "DeiTForMaskedImageModeling"),
|
("deit", "DeiTForMaskedImageModeling"),
|
||||||
@@ -1243,6 +1294,7 @@ MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = _LazyAutoMapping(
|
|||||||
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
|
CONFIG_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES
|
||||||
)
|
)
|
||||||
MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
|
MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
|
||||||
|
MODEL_FOR_IMAGE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_IMAGE_MAPPING_NAMES)
|
||||||
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping(
|
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping(
|
||||||
CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES
|
CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -63,7 +63,10 @@ from .integrations.deepspeed import deepspeed_init, deepspeed_load_checkpoint, i
|
|||||||
from .integrations.tpu import tpu_spmd_dataloader
|
from .integrations.tpu import tpu_spmd_dataloader
|
||||||
from .modelcard import TrainingSummary
|
from .modelcard import TrainingSummary
|
||||||
from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model
|
from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model
|
||||||
from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_MAPPING_NAMES
|
from .models.auto.modeling_auto import (
|
||||||
|
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
|
||||||
|
MODEL_MAPPING_NAMES,
|
||||||
|
)
|
||||||
from .optimization import Adafactor, get_scheduler
|
from .optimization import Adafactor, get_scheduler
|
||||||
from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_13
|
from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_13
|
||||||
from .tokenization_utils_base import PreTrainedTokenizerBase
|
from .tokenization_utils_base import PreTrainedTokenizerBase
|
||||||
|
|||||||
@@ -598,6 +598,9 @@ MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING = None
|
|||||||
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
|
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
|
||||||
|
|
||||||
|
|
||||||
|
MODEL_FOR_IMAGE_MAPPING = None
|
||||||
|
|
||||||
|
|
||||||
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING = None
|
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING = None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ from ..models.auto.modeling_auto import (
|
|||||||
MODEL_FOR_CTC_MAPPING_NAMES,
|
MODEL_FOR_CTC_MAPPING_NAMES,
|
||||||
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
|
MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
|
||||||
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
|
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
|
||||||
|
MODEL_FOR_IMAGE_MAPPING_NAMES,
|
||||||
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES,
|
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES,
|
||||||
MODEL_FOR_MASKED_LM_MAPPING_NAMES,
|
MODEL_FOR_MASKED_LM_MAPPING_NAMES,
|
||||||
MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES,
|
MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES,
|
||||||
@@ -95,6 +96,7 @@ def _generate_supported_model_class_names(
|
|||||||
"audio-classification": MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
|
"audio-classification": MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
|
||||||
"semantic-segmentation": MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
|
"semantic-segmentation": MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
|
||||||
"backbone": MODEL_FOR_BACKBONE_MAPPING_NAMES,
|
"backbone": MODEL_FOR_BACKBONE_MAPPING_NAMES,
|
||||||
|
"image-feature-extraction": MODEL_FOR_IMAGE_MAPPING_NAMES,
|
||||||
}
|
}
|
||||||
|
|
||||||
if supported_tasks is None:
|
if supported_tasks is None:
|
||||||
|
|||||||
@@ -700,7 +700,10 @@ class ModelTesterMixin:
|
|||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
if (
|
if (
|
||||||
model_class.__name__
|
model_class.__name__
|
||||||
in [*get_values(MODEL_MAPPING_NAMES), *get_values(MODEL_FOR_BACKBONE_MAPPING_NAMES)]
|
in [
|
||||||
|
*get_values(MODEL_MAPPING_NAMES),
|
||||||
|
*get_values(MODEL_FOR_BACKBONE_MAPPING_NAMES),
|
||||||
|
]
|
||||||
or not model_class.supports_gradient_checkpointing
|
or not model_class.supports_gradient_checkpointing
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -732,6 +732,8 @@ def check_all_auto_object_names_being_defined():
|
|||||||
# module, if it's a private model defined in this file.
|
# module, if it's a private model defined in this file.
|
||||||
if name.endswith("MODEL_MAPPING_NAMES") and is_a_private_model(class_name):
|
if name.endswith("MODEL_MAPPING_NAMES") and is_a_private_model(class_name):
|
||||||
continue
|
continue
|
||||||
|
if name.endswith("MODEL_FOR_IMAGE_MAPPING_NAMES") and is_a_private_model(class_name):
|
||||||
|
continue
|
||||||
failures.append(
|
failures.append(
|
||||||
f"`{class_name}` appears in the mapping `{name}` but it is not defined in the library."
|
f"`{class_name}` appears in the mapping `{name}` but it is not defined in the library."
|
||||||
)
|
)
|
||||||
|
|||||||
1
utils/update_metadata.py
Normal file → Executable file
1
utils/update_metadata.py
Normal file → Executable file
@@ -62,6 +62,7 @@ _re_pt_models = re.compile(r"(.*)(?:Model|Encoder|Decoder|ForConditionalGenerati
|
|||||||
PIPELINE_TAGS_AND_AUTO_MODELS = [
|
PIPELINE_TAGS_AND_AUTO_MODELS = [
|
||||||
("pretraining", "MODEL_FOR_PRETRAINING_MAPPING_NAMES", "AutoModelForPreTraining"),
|
("pretraining", "MODEL_FOR_PRETRAINING_MAPPING_NAMES", "AutoModelForPreTraining"),
|
||||||
("feature-extraction", "MODEL_MAPPING_NAMES", "AutoModel"),
|
("feature-extraction", "MODEL_MAPPING_NAMES", "AutoModel"),
|
||||||
|
("image-feature-extraction", "MODEL_FOR_IMAGE_MAPPING_NAMES", "AutoModel"),
|
||||||
("audio-classification", "MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES", "AutoModelForAudioClassification"),
|
("audio-classification", "MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES", "AutoModelForAudioClassification"),
|
||||||
("text-generation", "MODEL_FOR_CAUSAL_LM_MAPPING_NAMES", "AutoModelForCausalLM"),
|
("text-generation", "MODEL_FOR_CAUSAL_LM_MAPPING_NAMES", "AutoModelForCausalLM"),
|
||||||
("automatic-speech-recognition", "MODEL_FOR_CTC_MAPPING_NAMES", "AutoModelForCTC"),
|
("automatic-speech-recognition", "MODEL_FOR_CTC_MAPPING_NAMES", "AutoModelForCTC"),
|
||||||
|
|||||||
Reference in New Issue
Block a user