SuperPointModel -> SuperPointForKeypointDetection (#29757)

2024-03-20 15:41:03 +00:00
parent 1248f09252
commit 3c17c529cc
11 changed files with 63 additions and 30 deletions
--- a/docs/source/en/model_doc/auto.md
+++ b/docs/source/en/model_doc/auto.md
@@ -250,6 +250,10 @@ The following auto classes are available for the following computer vision tasks

 [[autodoc]] AutoModelForVideoClassification

+### AutoModelForKeypointDetection
+
+[[autodoc]] AutoModelForKeypointDetection
+
 ### AutoModelForMaskedImageModeling

 [[autodoc]] AutoModelForMaskedImageModeling
--- a/docs/source/en/model_doc/superpoint.md
+++ b/docs/source/en/model_doc/superpoint.md
@@ -1,7 +1,7 @@
 <!--Copyright 2024 The HuggingFace Team. All rights reserved.

 Licensed under the MIT License; you may not use this file except in compliance with
-the License. 
+the License.

 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
@@ -113,10 +113,8 @@ The original code can be found [here](https://github.com/magicleap/SuperPointPre

 - preprocess

-## SuperPointModel
+## SuperPointForKeypointDetection

-[[autodoc]] SuperPointModel
+[[autodoc]] SuperPointForKeypointDetection

 - forward
-
-
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -1487,6 +1487,7 @@ else:
            "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
            "MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
            "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
+            "MODEL_FOR_KEYPOINT_DETECTION_MAPPING",
            "MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
            "MODEL_FOR_MASKED_LM_MAPPING",
            "MODEL_FOR_MASK_GENERATION_MAPPING",
@@ -1527,6 +1528,7 @@ else:
            "AutoModelForImageSegmentation",
            "AutoModelForImageToImage",
            "AutoModelForInstanceSegmentation",
+            "AutoModelForKeypointDetection",
            "AutoModelForMaskedImageModeling",
            "AutoModelForMaskedLM",
            "AutoModelForMaskGeneration",
@@ -3341,7 +3343,7 @@ else:
    _import_structure["models.superpoint"].extend(
        [
            "SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST",
-            "SuperPointModel",
+            "SuperPointForKeypointDetection",
            "SuperPointPreTrainedModel",
        ]
    )
@@ -6319,6 +6321,7 @@ if TYPE_CHECKING:
            MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
            MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
            MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
+            MODEL_FOR_KEYPOINT_DETECTION_MAPPING,
            MODEL_FOR_MASK_GENERATION_MAPPING,
            MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
            MODEL_FOR_MASKED_LM_MAPPING,
@@ -6359,6 +6362,7 @@ if TYPE_CHECKING:
            AutoModelForImageSegmentation,
            AutoModelForImageToImage,
            AutoModelForInstanceSegmentation,
+            AutoModelForKeypointDetection,
            AutoModelForMaskedImageModeling,
            AutoModelForMaskedLM,
            AutoModelForMaskGeneration,
@@ -7852,7 +7856,7 @@ if TYPE_CHECKING:
        )
        from .models.superpoint import (
            SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST,
-            SuperPointModel,
+            SuperPointForKeypointDetection,
            SuperPointPreTrainedModel,
        )
        from .models.swiftformer import (
--- a/src/transformers/models/auto/init.py
+++ b/src/transformers/models/auto/init.py
@@ -52,6 +52,7 @@ else:
        "MODEL_FOR_IMAGE_MAPPING",
        "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
        "MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
+        "MODEL_FOR_KEYPOINT_DETECTION_MAPPING",
        "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
        "MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
        "MODEL_FOR_MASKED_LM_MAPPING",
@@ -92,6 +93,7 @@ else:
        "AutoModelForImageSegmentation",
        "AutoModelForImageToImage",
        "AutoModelForInstanceSegmentation",
+        "AutoModelForKeypointDetection",
        "AutoModelForMaskGeneration",
        "AutoModelForTextEncoding",
        "AutoModelForMaskedImageModeling",
@@ -117,7 +119,6 @@ else:
        "AutoModelWithLMHead",
        "AutoModelForZeroShotImageClassification",
        "AutoModelForZeroShotObjectDetection",
-        "AutoModelForKeypointDetection",
    ]

 try:
@@ -239,6 +240,7 @@ if TYPE_CHECKING:
            MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
            MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
            MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
+            MODEL_FOR_KEYPOINT_DETECTION_MAPPING,
            MODEL_FOR_MASK_GENERATION_MAPPING,
            MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
            MODEL_FOR_MASKED_LM_MAPPING,
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -207,7 +207,6 @@ MODEL_MAPPING_NAMES = OrderedDict(
        ("squeezebert", "SqueezeBertModel"),
        ("stablelm", "StableLmModel"),
        ("starcoder2", "Starcoder2Model"),
-        ("superpoint", "SuperPointModel"),
        ("swiftformer", "SwiftFormerModel"),
        ("swin", "SwinModel"),
        ("swin2sr", "Swin2SRModel"),
@@ -1225,6 +1224,14 @@ MODEL_FOR_MASK_GENERATION_MAPPING_NAMES = OrderedDict(
    ]
 )

+
+MODEL_FOR_KEYPOINT_DETECTION_MAPPING_NAMES = OrderedDict(
+    [
+        ("superpoint", "SuperPointForKeypointDetection"),
+    ]
+)
+
+
 MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES = OrderedDict(
    [
        ("albert", "AlbertModel"),
@@ -1360,6 +1367,10 @@ MODEL_FOR_BACKBONE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_BA

 MODEL_FOR_MASK_GENERATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASK_GENERATION_MAPPING_NAMES)

+MODEL_FOR_KEYPOINT_DETECTION_MAPPING = _LazyAutoMapping(
+    CONFIG_MAPPING_NAMES, MODEL_FOR_KEYPOINT_DETECTION_MAPPING_NAMES
+)
+
 MODEL_FOR_TEXT_ENCODING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES)

 MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING = _LazyAutoMapping(
@@ -1377,6 +1388,10 @@ class AutoModelForMaskGeneration(_BaseAutoModelClass):
    _model_mapping = MODEL_FOR_MASK_GENERATION_MAPPING


+class AutoModelForKeypointDetection(_BaseAutoModelClass):
+    _model_mapping = MODEL_FOR_KEYPOINT_DETECTION_MAPPING
+
+
 class AutoModelForTextEncoding(_BaseAutoModelClass):
    _model_mapping = MODEL_FOR_TEXT_ENCODING_MAPPING

--- a/src/transformers/models/superpoint/init.py
+++ b/src/transformers/models/superpoint/init.py
@@ -40,7 +40,7 @@ except OptionalDependencyNotAvailable:
 else:
    _import_structure["modeling_superpoint"] = [
        "SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST",
-        "SuperPointModel",
+        "SuperPointForKeypointDetection",
        "SuperPointPreTrainedModel",
    ]

@@ -67,7 +67,7 @@ if TYPE_CHECKING:
    else:
        from .modeling_superpoint import (
            SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST,
-            SuperPointModel,
+            SuperPointForKeypointDetection,
            SuperPointPreTrainedModel,
        )

--- a/src/transformers/models/superpoint/configuration_superpoint.py
+++ b/src/transformers/models/superpoint/configuration_superpoint.py
@@ -26,7 +26,7 @@ SUPERPOINT_PRETRAINED_CONFIG_ARCHIVE_MAP = {

 class SuperPointConfig(PretrainedConfig):
    r"""
-    This is the configuration class to store the configuration of a [`SuperPointModel`]. It is used to instantiate a
+    This is the configuration class to store the configuration of a [`SuperPointForKeypointDetection`]. It is used to instantiate a
    SuperPoint model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the SuperPoint
    [magic-leap-community/superpoint](https://huggingface.co/magic-leap-community/superpoint) architecture.
@@ -53,12 +53,12 @@ class SuperPointConfig(PretrainedConfig):

    Example:
    ```python
-    >>> from transformers import SuperPointConfig, SuperPointModel
+    >>> from transformers import SuperPointConfig, SuperPointForKeypointDetection

    >>> # Initializing a SuperPoint superpoint style configuration
    >>> configuration = SuperPointConfig()
    >>> # Initializing a model from the superpoint style configuration
-    >>> model = SuperPointModel(configuration)
+    >>> model = SuperPointForKeypointDetection(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```"""
--- a/src/transformers/models/superpoint/convert_superpoint_to_pytorch.py
+++ b/src/transformers/models/superpoint/convert_superpoint_to_pytorch.py
@@ -18,7 +18,7 @@ import requests
 import torch
 from PIL import Image

-from transformers import SuperPointConfig, SuperPointImageProcessor, SuperPointModel
+from transformers import SuperPointConfig, SuperPointForKeypointDetection, SuperPointImageProcessor


 def get_superpoint_config():
@@ -106,7 +106,7 @@ def convert_superpoint_checkpoint(checkpoint_url, pytorch_dump_folder_path, save
        rename_key(new_state_dict, src, dest)

    # Load HuggingFace model
-    model = SuperPointModel(config)
+    model = SuperPointForKeypointDetection(config)
    model.load_state_dict(new_state_dict)
    model.eval()
    print("Successfully loaded weights in the model")
--- a/src/transformers/models/superpoint/modeling_superpoint.py
+++ b/src/transformers/models/superpoint/modeling_superpoint.py
@@ -390,7 +390,7 @@ Args:
    "SuperPoint model outputting keypoints and descriptors.",
    SUPERPOINT_START_DOCSTRING,
 )
-class SuperPointModel(SuperPointPreTrainedModel):
+class SuperPointForKeypointDetection(SuperPointPreTrainedModel):
    """
    SuperPoint model. It consists of a SuperPointEncoder, a SuperPointInterestPointDecoder and a
    SuperPointDescriptorDecoder. SuperPoint was proposed in `SuperPoint: Self-Supervised Interest Point Detection and
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -606,6 +606,9 @@ MODEL_FOR_IMAGE_TO_IMAGE_MAPPING = None
 MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = None


+MODEL_FOR_KEYPOINT_DETECTION_MAPPING = None
+
+
 MODEL_FOR_MASK_GENERATION_MAPPING = None


@@ -778,6 +781,13 @@ class AutoModelForInstanceSegmentation(metaclass=DummyObject):
        requires_backends(self, ["torch"])


+class AutoModelForKeypointDetection(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
 class AutoModelForMaskedImageModeling(metaclass=DummyObject):
    _backends = ["torch"]

@@ -8029,7 +8039,7 @@ class Starcoder2PreTrainedModel(metaclass=DummyObject):
 SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST = None


-class SuperPointModel(metaclass=DummyObject):
+class SuperPointForKeypointDetection(metaclass=DummyObject):
    _backends = ["torch"]

    def __init__(self, *args, **kwargs):
--- a/tests/models/superpoint/test_modeling_superpoint.py
+++ b/tests/models/superpoint/test_modeling_superpoint.py
@@ -28,7 +28,7 @@ if is_torch_available():

    from transformers import (
        SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST,
-        SuperPointModel,
+        SuperPointForKeypointDetection,
    )

 if is_vision_available():
@@ -86,7 +86,7 @@ class SuperPointModelTester:
        )

    def create_and_check_model(self, config, pixel_values):
-        model = SuperPointModel(config=config)
+        model = SuperPointForKeypointDetection(config=config)
        model.to(torch_device)
        model.eval()
        result = model(pixel_values)
@@ -109,7 +109,7 @@ class SuperPointModelTester:

@require_torch
 class SuperPointModelTest(ModelTesterMixin, unittest.TestCase):
-    all_model_classes = (SuperPointModel,) if is_torch_available() else ()
+    all_model_classes = (SuperPointForKeypointDetection,) if is_torch_available() else ()
    all_generative_model_classes = () if is_torch_available() else ()

    fx_compatible = False
@@ -134,31 +134,31 @@ class SuperPointModelTest(ModelTesterMixin, unittest.TestCase):
    def create_and_test_config_common_properties(self):
        return

-    @unittest.skip(reason="SuperPointModel does not use inputs_embeds")
+    @unittest.skip(reason="SuperPointForKeypointDetection does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

-    @unittest.skip(reason="SuperPointModel does not support input and output embeddings")
+    @unittest.skip(reason="SuperPointForKeypointDetection does not support input and output embeddings")
    def test_model_common_attributes(self):
        pass

-    @unittest.skip(reason="SuperPointModel does not use feedforward chunking")
+    @unittest.skip(reason="SuperPointForKeypointDetection does not use feedforward chunking")
    def test_feed_forward_chunking(self):
        pass

-    @unittest.skip(reason="SuperPointModel is not trainable")
+    @unittest.skip(reason="SuperPointForKeypointDetection is not trainable")
    def test_training(self):
        pass

-    @unittest.skip(reason="SuperPointModel is not trainable")
+    @unittest.skip(reason="SuperPointForKeypointDetection is not trainable")
    def test_training_gradient_checkpointing(self):
        pass

-    @unittest.skip(reason="SuperPointModel is not trainable")
+    @unittest.skip(reason="SuperPointForKeypointDetection is not trainable")
    def test_training_gradient_checkpointing_use_reentrant(self):
        pass

-    @unittest.skip(reason="SuperPointModel is not trainable")
+    @unittest.skip(reason="SuperPointForKeypointDetection is not trainable")
    def test_training_gradient_checkpointing_use_reentrant_false(self):
        pass

@@ -219,7 +219,7 @@ class SuperPointModelTest(ModelTesterMixin, unittest.TestCase):
    @slow
    def test_model_from_pretrained(self):
        for model_name in SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-            model = SuperPointModel.from_pretrained(model_name)
+            model = SuperPointForKeypointDetection.from_pretrained(model_name)
            self.assertIsNotNone(model)

    def test_forward_labels_should_be_none(self):
@@ -254,7 +254,7 @@ class SuperPointModelIntegrationTest(unittest.TestCase):

    @slow
    def test_inference(self):
-        model = SuperPointModel.from_pretrained("magic-leap-community/superpoint").to(torch_device)
+        model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint").to(torch_device)
        preprocessor = self.default_image_processor
        images = prepare_imgs()
        inputs = preprocessor(images=images, return_tensors="pt").to(torch_device)