From 89c7d9cfba2ec988289ba17360b6cf8c8d240be3 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 4 Mar 2022 13:56:15 +0100
Subject: [PATCH] Making MaskFormerForInstanceSegmentation. (#15934)

Small adjustments.

Adding in type hint.

Last fix ?

Only include the default dict thing, not the pipelines.
---
 docs/source/model_doc/auto.mdx                |  4 ++++
 src/transformers/__init__.py                  |  4 ++++
 src/transformers/models/auto/__init__.py      |  4 ++++
 .../models/auto/feature_extraction_auto.py    |  1 +
 src/transformers/models/auto/modeling_auto.py | 19 +++++++++++++++++++
 src/transformers/utils/dummy_pt_objects.py    | 10 ++++++++++
 6 files changed, 42 insertions(+)

diff --git a/docs/source/model_doc/auto.mdx b/docs/source/model_doc/auto.mdx
index 20a73c7e4c..d941b00318 100644
--- a/docs/source/model_doc/auto.mdx
+++ b/docs/source/model_doc/auto.mdx
@@ -158,6 +158,10 @@ Likewise, if your `NewModel` is a subclass of [`PreTrainedModel`], make sure its
 
 [[autodoc]] AutoModelForSemanticSegmentation
 
+## AutoModelForInstanceSegmentation
+
+[[autodoc]] AutoModelForInstanceSegmentation
+
 ## TFAutoModel
 
 [[autodoc]] TFAutoModel
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index 9eac410873..f0b1c1a42a 100755
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -677,6 +677,7 @@ if is_torch_available():
             "MODEL_FOR_CTC_MAPPING",
             "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
             "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
+            "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
             "MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
             "MODEL_FOR_MASKED_LM_MAPPING",
             "MODEL_FOR_MULTIPLE_CHOICE_MAPPING",
@@ -701,6 +702,7 @@ if is_torch_available():
             "AutoModelForCTC",
             "AutoModelForImageClassification",
             "AutoModelForImageSegmentation",
+            "AutoModelForInstanceSegmentation",
             "AutoModelForMaskedImageModeling",
             "AutoModelForMaskedLM",
             "AutoModelForMultipleChoice",
@@ -2892,6 +2894,7 @@ if TYPE_CHECKING:
             MODEL_FOR_CTC_MAPPING,
             MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
             MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
+            MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
             MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
             MODEL_FOR_MASKED_LM_MAPPING,
             MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
@@ -2916,6 +2919,7 @@ if TYPE_CHECKING:
             AutoModelForCTC,
             AutoModelForImageClassification,
             AutoModelForImageSegmentation,
+            AutoModelForInstanceSegmentation,
             AutoModelForMaskedImageModeling,
             AutoModelForMaskedLM,
             AutoModelForMultipleChoice,
diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py
index 9a0e73fc24..d0e23beabb 100644
--- a/src/transformers/models/auto/__init__.py
+++ b/src/transformers/models/auto/__init__.py
@@ -38,6 +38,7 @@ if is_torch_available():
         "MODEL_FOR_CTC_MAPPING",
         "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
         "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
+        "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
         "MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
         "MODEL_FOR_MASKED_LM_MAPPING",
         "MODEL_FOR_MULTIPLE_CHOICE_MAPPING",
@@ -62,6 +63,7 @@ if is_torch_available():
         "AutoModelForCTC",
         "AutoModelForImageClassification",
         "AutoModelForImageSegmentation",
+        "AutoModelForInstanceSegmentation",
         "AutoModelForMaskedImageModeling",
         "AutoModelForMaskedLM",
         "AutoModelForMultipleChoice",
@@ -157,6 +159,7 @@ if TYPE_CHECKING:
             MODEL_FOR_CTC_MAPPING,
             MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
             MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
+            MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
             MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
             MODEL_FOR_MASKED_LM_MAPPING,
             MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
@@ -181,6 +184,7 @@ if TYPE_CHECKING:
             AutoModelForCTC,
             AutoModelForImageClassification,
             AutoModelForImageSegmentation,
+            AutoModelForInstanceSegmentation,
             AutoModelForMaskedImageModeling,
             AutoModelForMaskedLM,
             AutoModelForMultipleChoice,
diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py
index ed0e58e008..7f1a448200 100644
--- a/src/transformers/models/auto/feature_extraction_auto.py
+++ b/src/transformers/models/auto/feature_extraction_auto.py
@@ -54,6 +54,7 @@ FEATURE_EXTRACTOR_MAPPING_NAMES = OrderedDict(
         ("segformer", "SegformerFeatureExtractor"),
         ("convnext", "ConvNextFeatureExtractor"),
         ("poolformer", "PoolFormerFeatureExtractor"),
+        ("maskformer", "MaskFormerFeatureExtractor"),
     ]
 )
 
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index d1e4727c0f..92f1a1f8a3 100644
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -314,6 +314,13 @@ MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = OrderedDict(
     ]
 )
 
+MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES = OrderedDict(
+    [
+        # Model for Instance Segmentation mapping
+        ("maskformer", "MaskFormerForInstanceSegmentation"),
+    ]
+)
+
 MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict(
     [
         ("vision-encoder-decoder", "VisionEncoderDecoderModel"),
@@ -648,6 +655,9 @@ MODEL_FOR_IMAGE_SEGMENTATION_MAPPING = _LazyAutoMapping(
 MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING = _LazyAutoMapping(
     CONFIG_MAPPING_NAMES, MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES
 )
+MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = _LazyAutoMapping(
+    CONFIG_MAPPING_NAMES, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES
+)
 MODEL_FOR_VISION_2_SEQ_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES)
 MODEL_FOR_MASKED_LM_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
 MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = _LazyAutoMapping(
@@ -802,6 +812,15 @@ AutoModelForSemanticSegmentation = auto_class_update(
 )
 
 
+class AutoModelForInstanceSegmentation(_BaseAutoModelClass):
+    _model_mapping = MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING
+
+
+AutoModelForInstanceSegmentation = auto_class_update(
+    AutoModelForInstanceSegmentation, head_doc="instance segmentation"
+)
+
+
 class AutoModelForObjectDetection(_BaseAutoModelClass):
     _model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
 
diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
index 335a4fe471..e222a5c15d 100644
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -365,6 +365,9 @@ MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
 MODEL_FOR_IMAGE_SEGMENTATION_MAPPING = None
 
 
+MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = None
+
+
 MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING = None
 
 
@@ -469,6 +472,13 @@ class AutoModelForImageSegmentation(metaclass=DummyObject):
         requires_backends(self, ["torch"])
 
 
+class AutoModelForInstanceSegmentation(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
 class AutoModelForMaskedImageModeling(metaclass=DummyObject):
     _backends = ["torch"]