From dde718e7a62bf8caa6623b5635ba02d6cb758c75 Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Tue, 7 Mar 2023 21:19:39 +0100 Subject: [PATCH] [DETR and friends] Remove is_timm_available (#21814) * First draft * Fix to_dict * Improve conversion script * Update config * Remove timm dependency * Fix dummies * Fix typo, add integration test * Upload 101 model as well * Remove timm dummies * Fix style --------- Co-authored-by: Niels Rogge --- src/transformers/__init__.py | 139 ++++++++---------- .../models/conditional_detr/__init__.py | 6 +- .../image_processing_conditional_detr.py | 4 +- .../models/deformable_detr/__init__.py | 6 +- .../image_processing_deformable_detr.py | 4 +- src/transformers/models/detr/__init__.py | 6 +- .../models/detr/configuration_detr.py | 27 +++- .../models/detr/convert_detr_to_pytorch.py | 42 ++++-- .../models/detr/image_processing_detr.py | 4 +- .../models/table_transformer/__init__.py | 6 +- .../configuration_table_transformer.py | 2 + src/transformers/utils/dummy_pt_objects.py | 110 ++++++++++++++ .../utils/dummy_timm_and_vision_objects.py | 112 -------------- tests/models/detr/test_modeling_detr.py | 34 ++++- 14 files changed, 275 insertions(+), 227 deletions(-) delete mode 100644 src/transformers/utils/dummy_timm_and_vision_objects.py diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index e721e01016..c75c9b195f 100644 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -866,52 +866,6 @@ else: _import_structure["models.vit_hybrid"].extend(["ViTHybridImageProcessor"]) _import_structure["models.yolos"].extend(["YolosFeatureExtractor", "YolosImageProcessor"]) -# Timm-backed objects -try: - if not (is_timm_available() and is_vision_available()): - raise OptionalDependencyNotAvailable() -except OptionalDependencyNotAvailable: - from .utils import dummy_timm_and_vision_objects - - _import_structure["utils.dummy_timm_and_vision_objects"] = [ - name for name in dir(dummy_timm_and_vision_objects) if not name.startswith("_") - ] -else: - _import_structure["models.deformable_detr"].extend( - [ - "DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST", - "DeformableDetrForObjectDetection", - "DeformableDetrModel", - "DeformableDetrPreTrainedModel", - ] - ) - _import_structure["models.detr"].extend( - [ - "DETR_PRETRAINED_MODEL_ARCHIVE_LIST", - "DetrForObjectDetection", - "DetrForSegmentation", - "DetrModel", - "DetrPreTrainedModel", - ] - ) - _import_structure["models.table_transformer"].extend( - [ - "TABLE_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST", - "TableTransformerForObjectDetection", - "TableTransformerModel", - "TableTransformerPreTrainedModel", - ] - ) - _import_structure["models.conditional_detr"].extend( - [ - "CONDITIONAL_DETR_PRETRAINED_MODEL_ARCHIVE_LIST", - "ConditionalDetrForObjectDetection", - "ConditionalDetrForSegmentation", - "ConditionalDetrModel", - "ConditionalDetrPreTrainedModel", - ] - ) - # PyTorch-backed objects try: @@ -1309,6 +1263,15 @@ else: "CodeGenPreTrainedModel", ] ) + _import_structure["models.conditional_detr"].extend( + [ + "CONDITIONAL_DETR_PRETRAINED_MODEL_ARCHIVE_LIST", + "ConditionalDetrForObjectDetection", + "ConditionalDetrForSegmentation", + "ConditionalDetrModel", + "ConditionalDetrPreTrainedModel", + ] + ) _import_structure["models.convbert"].extend( [ "CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST", @@ -1406,6 +1369,14 @@ else: "DecisionTransformerPreTrainedModel", ] ) + _import_structure["models.deformable_detr"].extend( + [ + "DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST", + "DeformableDetrForObjectDetection", + "DeformableDetrModel", + "DeformableDetrPreTrainedModel", + ] + ) _import_structure["models.deit"].extend( [ "DEIT_PRETRAINED_MODEL_ARCHIVE_LIST", @@ -1424,6 +1395,15 @@ else: "DetaPreTrainedModel", ] ) + _import_structure["models.detr"].extend( + [ + "DETR_PRETRAINED_MODEL_ARCHIVE_LIST", + "DetrForObjectDetection", + "DetrForSegmentation", + "DetrModel", + "DetrPreTrainedModel", + ] + ) _import_structure["models.dinat"].extend( [ "DINAT_PRETRAINED_MODEL_ARCHIVE_LIST", @@ -2372,6 +2352,14 @@ else: "load_tf_weights_in_t5", ] ) + _import_structure["models.table_transformer"].extend( + [ + "TABLE_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST", + "TableTransformerForObjectDetection", + "TableTransformerModel", + "TableTransformerPreTrainedModel", + ] + ) _import_structure["models.tapas"].extend( [ "TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST", @@ -4398,39 +4386,6 @@ if TYPE_CHECKING: from .models.yolos import YolosFeatureExtractor, YolosImageProcessor # Modeling - try: - if not (is_timm_available() and is_vision_available()): - raise OptionalDependencyNotAvailable() - except OptionalDependencyNotAvailable: - from .utils.dummy_timm_and_vision_objects import * - else: - from .models.conditional_detr import ( - CONDITIONAL_DETR_PRETRAINED_MODEL_ARCHIVE_LIST, - ConditionalDetrForObjectDetection, - ConditionalDetrForSegmentation, - ConditionalDetrModel, - ConditionalDetrPreTrainedModel, - ) - from .models.deformable_detr import ( - DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST, - DeformableDetrForObjectDetection, - DeformableDetrModel, - DeformableDetrPreTrainedModel, - ) - from .models.detr import ( - DETR_PRETRAINED_MODEL_ARCHIVE_LIST, - DetrForObjectDetection, - DetrForSegmentation, - DetrModel, - DetrPreTrainedModel, - ) - from .models.table_transformer import ( - TABLE_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, - TableTransformerForObjectDetection, - TableTransformerModel, - TableTransformerPreTrainedModel, - ) - try: if not is_torch_available(): raise OptionalDependencyNotAvailable() @@ -4767,6 +4722,13 @@ if TYPE_CHECKING: CodeGenModel, CodeGenPreTrainedModel, ) + from .models.conditional_detr import ( + CONDITIONAL_DETR_PRETRAINED_MODEL_ARCHIVE_LIST, + ConditionalDetrForObjectDetection, + ConditionalDetrForSegmentation, + ConditionalDetrModel, + ConditionalDetrPreTrainedModel, + ) from .models.convbert import ( CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ConvBertForMaskedLM, @@ -4848,6 +4810,12 @@ if TYPE_CHECKING: DecisionTransformerModel, DecisionTransformerPreTrainedModel, ) + from .models.deformable_detr import ( + DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST, + DeformableDetrForObjectDetection, + DeformableDetrModel, + DeformableDetrPreTrainedModel, + ) from .models.deit import ( DEIT_PRETRAINED_MODEL_ARCHIVE_LIST, DeiTForImageClassification, @@ -4862,6 +4830,13 @@ if TYPE_CHECKING: DetaModel, DetaPreTrainedModel, ) + from .models.detr import ( + DETR_PRETRAINED_MODEL_ARCHIVE_LIST, + DetrForObjectDetection, + DetrForSegmentation, + DetrModel, + DetrPreTrainedModel, + ) from .models.dinat import ( DINAT_PRETRAINED_MODEL_ARCHIVE_LIST, DinatBackbone, @@ -5626,6 +5601,12 @@ if TYPE_CHECKING: T5PreTrainedModel, load_tf_weights_in_t5, ) + from .models.table_transformer import ( + TABLE_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, + TableTransformerForObjectDetection, + TableTransformerModel, + TableTransformerPreTrainedModel, + ) from .models.tapas import ( TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST, TapasForMaskedLM, diff --git a/src/transformers/models/conditional_detr/__init__.py b/src/transformers/models/conditional_detr/__init__.py index da5ed8f2f8..5653233211 100644 --- a/src/transformers/models/conditional_detr/__init__.py +++ b/src/transformers/models/conditional_detr/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_timm_available, is_vision_available +from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available _import_structure = { @@ -35,7 +35,7 @@ else: _import_structure["image_processing_conditional_detr"] = ["ConditionalDetrImageProcessor"] try: - if not is_timm_available(): + if not is_torch_available(): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: pass @@ -66,7 +66,7 @@ if TYPE_CHECKING: from .image_processing_conditional_detr import ConditionalDetrImageProcessor try: - if not is_timm_available(): + if not is_torch_available(): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: pass diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index cc4a24cb05..a496f7787f 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -1101,12 +1101,12 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): images (`ImageInput`): Image or batch of images to preprocess. annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): - List of annotations associated with the image or batch of images. If annotionation is for object + List of annotations associated with the image or batch of images. If annotation is for object detection, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a dictionary. An image can have no annotations, in which case the list should be empty. - If annotionation is for segmentation, the annotations should be a dictionary with the following keys: + If annotation is for segmentation, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary. An image can have no segments, in which case the list should be empty. diff --git a/src/transformers/models/deformable_detr/__init__.py b/src/transformers/models/deformable_detr/__init__.py index 6614bc5f92..a560265f4b 100644 --- a/src/transformers/models/deformable_detr/__init__.py +++ b/src/transformers/models/deformable_detr/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_timm_available, is_vision_available +from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available _import_structure = { @@ -31,7 +31,7 @@ else: _import_structure["image_processing_deformable_detr"] = ["DeformableDetrImageProcessor"] try: - if not is_timm_available(): + if not is_torch_available(): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: pass @@ -57,7 +57,7 @@ if TYPE_CHECKING: from .image_processing_deformable_detr import DeformableDetrImageProcessor try: - if not is_timm_available(): + if not is_torch_available(): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: pass diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index 87e6ef508c..01ff544719 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -1099,12 +1099,12 @@ class DeformableDetrImageProcessor(BaseImageProcessor): images (`ImageInput`): Image or batch of images to preprocess. annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): - List of annotations associated with the image or batch of images. If annotionation is for object + List of annotations associated with the image or batch of images. If annotation is for object detection, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a dictionary. An image can have no annotations, in which case the list should be empty. - If annotionation is for segmentation, the annotations should be a dictionary with the following keys: + If annotation is for segmentation, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary. An image can have no segments, in which case the list should be empty. diff --git a/src/transformers/models/detr/__init__.py b/src/transformers/models/detr/__init__.py index 1dcda4cc17..9cbaca9a54 100644 --- a/src/transformers/models/detr/__init__.py +++ b/src/transformers/models/detr/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_timm_available, is_vision_available +from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available, is_vision_available _import_structure = {"configuration_detr": ["DETR_PRETRAINED_CONFIG_ARCHIVE_MAP", "DetrConfig", "DetrOnnxConfig"]} @@ -29,7 +29,7 @@ else: _import_structure["image_processing_detr"] = ["DetrImageProcessor"] try: - if not is_timm_available(): + if not is_torch_available(): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: pass @@ -56,7 +56,7 @@ if TYPE_CHECKING: from .image_processing_detr import DetrImageProcessor try: - if not is_timm_available(): + if not is_torch_available(): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: pass diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 430efc913b..b3da5f86b0 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -14,8 +14,9 @@ # limitations under the License. """ DETR model configuration""" +import copy from collections import OrderedDict -from typing import Mapping +from typing import Dict, Mapping from packaging import version @@ -187,6 +188,8 @@ class DetrConfig(PretrainedConfig): backbone_model_type = backbone_config.get("model_type") config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + # set timm attributes to None + dilation, backbone, use_pretrained_backbone = None, None, None self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config @@ -233,6 +236,28 @@ class DetrConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @classmethod + def from_backbone_config(cls, backbone_config: PretrainedConfig, **kwargs): + """Instantiate a [`DetrConfig`] (or a derived class) from a pre-trained backbone model configuration. + Args: + backbone_config ([`PretrainedConfig`]): + The backbone configuration. + Returns: + [`DetrConfig`]: An instance of a configuration object + """ + return cls(backbone_config=backbone_config, **kwargs) + + def to_dict(self) -> Dict[str, any]: + """ + Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. Returns: + `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, + """ + output = copy.deepcopy(self.__dict__) + if output["backbone_config"] is not None: + output["backbone_config"] = self.backbone_config.to_dict() + output["model_type"] = self.__class__.model_type + return output + class DetrOnnxConfig(OnnxConfig): torch_onnx_minimum_version = version.parse("1.11") diff --git a/src/transformers/models/detr/convert_detr_to_pytorch.py b/src/transformers/models/detr/convert_detr_to_pytorch.py index 3ff2e38ac3..a52e592b94 100644 --- a/src/transformers/models/detr/convert_detr_to_pytorch.py +++ b/src/transformers/models/detr/convert_detr_to_pytorch.py @@ -1,5 +1,5 @@ # coding=utf-8 -# Copyright 2022 The HuggingFace Inc. team. +# Copyright 2023 The HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,16 +33,16 @@ logger = logging.get_logger(__name__) def get_detr_config(model_name): - config = DetrConfig(use_timm_backbone=False) - - # set backbone attributes - if "resnet50" in model_name: - pass - elif "resnet101" in model_name: - config.backbone_config = ResNetConfig.from_pretrained("microsoft/resnet-101") + # initialize config + if "resnet-50" in model_name: + backbone_config = ResNetConfig.from_pretrained("microsoft/resnet-50") + elif "resnet-101" in model_name: + backbone_config = ResNetConfig.from_pretrained("microsoft/resnet-101") else: raise ValueError("Model name should include either resnet50 or resnet101") + config = DetrConfig(use_timm_backbone=False, backbone_config=backbone_config) + # set label attributes is_panoptic = "panoptic" in model_name if is_panoptic: @@ -286,7 +286,7 @@ def prepare_img(): @torch.no_grad() -def convert_detr_checkpoint(model_name, pytorch_dump_folder_path): +def convert_detr_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_hub=False): """ Copy/paste/tweak model's weights to our DETR structure. """ @@ -295,8 +295,12 @@ def convert_detr_checkpoint(model_name, pytorch_dump_folder_path): config, is_panoptic = get_detr_config(model_name) # load original model from torch hub + model_name_to_original_name = { + "detr-resnet-50": "detr_resnet50", + "detr-resnet-101": "detr_resnet101", + } logger.info(f"Converting model {model_name}...") - detr = torch.hub.load("facebookresearch/detr", model_name, pretrained=True).eval() + detr = torch.hub.load("facebookresearch/detr", model_name_to_original_name[model_name], pretrained=True).eval() state_dict = detr.state_dict() # rename keys for src, dest in create_rename_keys(config): @@ -344,9 +348,6 @@ def convert_detr_checkpoint(model_name, pytorch_dump_folder_path): original_outputs = detr(pixel_values) outputs = model(pixel_values) - print("Logits:", outputs.logits[0, :3, :3]) - print("Original logits:", original_outputs["pred_logits"][0, :3, :3]) - assert torch.allclose(outputs.logits, original_outputs["pred_logits"], atol=1e-3) assert torch.allclose(outputs.pred_boxes, original_outputs["pred_boxes"], atol=1e-3) if is_panoptic: @@ -360,15 +361,26 @@ def convert_detr_checkpoint(model_name, pytorch_dump_folder_path): model.save_pretrained(pytorch_dump_folder_path) processor.save_pretrained(pytorch_dump_folder_path) + if push_to_hub: + # Upload model and image processor to the hub + logger.info("Uploading PyTorch model and image processor to the hub...") + model.push_to_hub(f"nielsr/{model_name}") + processor.push_to_hub(f"nielsr/{model_name}") + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - "--model_name", default="detr_resnet50", type=str, help="Name of the DETR model you'd like to convert." + "--model_name", + default="detr-resnet-50", + type=str, + choices=["detr-resnet-50", "detr-resnet-101"], + help="Name of the DETR model you'd like to convert.", ) parser.add_argument( "--pytorch_dump_folder_path", default=None, type=str, help="Path to the folder to output PyTorch model." ) + parser.add_argument("--push_to_hub", action="store_true", help="Whether to push the model to the hub or not.") args = parser.parse_args() - convert_detr_checkpoint(args.model_name, args.pytorch_dump_folder_path) + convert_detr_checkpoint(args.model_name, args.pytorch_dump_folder_path, args.push_to_hub) diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 6a0ace45b8..eaeae66c96 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -1065,12 +1065,12 @@ class DetrImageProcessor(BaseImageProcessor): images (`ImageInput`): Image or batch of images to preprocess. annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): - List of annotations associated with the image or batch of images. If annotionation is for object + List of annotations associated with the image or batch of images. If annotation is for object detection, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a dictionary. An image can have no annotations, in which case the list should be empty. - If annotionation is for segmentation, the annotations should be a dictionary with the following keys: + If annotation is for segmentation, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary. An image can have no segments, in which case the list should be empty. diff --git a/src/transformers/models/table_transformer/__init__.py b/src/transformers/models/table_transformer/__init__.py index bacb6a810a..346bc9ef9c 100644 --- a/src/transformers/models/table_transformer/__init__.py +++ b/src/transformers/models/table_transformer/__init__.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING -from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_timm_available +from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_available _import_structure = { @@ -26,7 +26,7 @@ _import_structure = { } try: - if not is_timm_available(): + if not is_torch_available(): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: pass @@ -47,7 +47,7 @@ if TYPE_CHECKING: ) try: - if not is_timm_available(): + if not is_torch_available(): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: pass diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index d74424ce69..94213008c6 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -189,6 +189,8 @@ class TableTransformerConfig(PretrainedConfig): backbone_model_type = backbone_config.get("model_type") config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + # set timm attributes to None + dilation, backbone, use_pretrained_backbone = None, None, None self.use_timm_backbone = use_timm_backbone self.backbone_config = backbone_config diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index d06bf1aea0..5212ea250a 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -1661,6 +1661,37 @@ class CodeGenPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) +CONDITIONAL_DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +class ConditionalDetrForObjectDetection(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ConditionalDetrForSegmentation(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ConditionalDetrModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class ConditionalDetrPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST = None @@ -2073,6 +2104,30 @@ class DecisionTransformerPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) +DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +class DeformableDetrForObjectDetection(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class DeformableDetrModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class DeformableDetrPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + DEIT_PRETRAINED_MODEL_ARCHIVE_LIST = None @@ -2135,6 +2190,37 @@ class DetaPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) +DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +class DetrForObjectDetection(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class DetrForSegmentation(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class DetrModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class DetrPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + DINAT_PRETRAINED_MODEL_ARCHIVE_LIST = None @@ -6040,6 +6126,30 @@ def load_tf_weights_in_t5(*args, **kwargs): requires_backends(load_tf_weights_in_t5, ["torch"]) +TABLE_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = None + + +class TableTransformerForObjectDetection(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class TableTransformerModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class TableTransformerPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST = None diff --git a/src/transformers/utils/dummy_timm_and_vision_objects.py b/src/transformers/utils/dummy_timm_and_vision_objects.py deleted file mode 100644 index b4a28babfc..0000000000 --- a/src/transformers/utils/dummy_timm_and_vision_objects.py +++ /dev/null @@ -1,112 +0,0 @@ -# This file is autogenerated by the command `make fix-copies`, do not edit. -from ..utils import DummyObject, requires_backends - - -CONDITIONAL_DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None - - -class ConditionalDetrForObjectDetection(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class ConditionalDetrForSegmentation(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class ConditionalDetrModel(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class ConditionalDetrPreTrainedModel(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None - - -class DeformableDetrForObjectDetection(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class DeformableDetrModel(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class DeformableDetrPreTrainedModel(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -DETR_PRETRAINED_MODEL_ARCHIVE_LIST = None - - -class DetrForObjectDetection(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class DetrForSegmentation(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class DetrModel(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class DetrPreTrainedModel(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -TABLE_TRANSFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = None - - -class TableTransformerForObjectDetection(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class TableTransformerModel(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) - - -class TableTransformerPreTrainedModel(metaclass=DummyObject): - _backends = ["timm", "vision"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["timm", "vision"]) diff --git a/tests/models/detr/test_modeling_detr.py b/tests/models/detr/test_modeling_detr.py index 6c7c512cc8..31d3db444c 100644 --- a/tests/models/detr/test_modeling_detr.py +++ b/tests/models/detr/test_modeling_detr.py @@ -20,7 +20,7 @@ import math import unittest from transformers import DetrConfig, is_timm_available, is_vision_available -from transformers.testing_utils import require_timm, require_vision, slow, torch_device +from transformers.testing_utils import require_timm, require_torch, require_vision, slow, torch_device from transformers.utils import cached_property from ...generation.test_utils import GenerationTesterMixin @@ -510,7 +510,7 @@ def prepare_img(): @require_timm @require_vision @slow -class DetrModelIntegrationTests(unittest.TestCase): +class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase): @cached_property def default_feature_extractor(self): return DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") if is_vision_available() else None @@ -626,3 +626,33 @@ class DetrModelIntegrationTests(unittest.TestCase): self.assertTrue(torch.allclose(results["segmentation"][:3, :3], expected_slice_segmentation, atol=1e-4)) self.assertTrue(len(results["segments_info"]), expected_number_of_segments) self.assertDictEqual(results["segments_info"][0], expected_first_segment) + + +@require_vision +@require_torch +@slow +class DetrModelIntegrationTests(unittest.TestCase): + @cached_property + def default_feature_extractor(self): + return ( + DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50", revision="no_timm") + if is_vision_available() + else None + ) + + def test_inference_no_head(self): + model = DetrModel.from_pretrained("facebook/detr-resnet-50", revision="no_timm").to(torch_device) + + feature_extractor = self.default_feature_extractor + image = prepare_img() + encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device) + + with torch.no_grad(): + outputs = model(**encoding) + + expected_shape = torch.Size((1, 100, 256)) + assert outputs.last_hidden_state.shape == expected_shape + expected_slice = torch.tensor( + [[0.0616, -0.5146, -0.4032], [-0.7629, -0.4934, -1.7153], [-0.4768, -0.6403, -0.7826]] + ).to(torch_device) + self.assertTrue(torch.allclose(outputs.last_hidden_state[0, :3, :3], expected_slice, atol=1e-4))