From 576cd45a57501856bf719f2eb6186325fb6d2f88 Mon Sep 17 00:00:00 2001
From: LeviVasconcelos <leviovasconcelos@gmail.com>
Date: Fri, 22 Sep 2023 13:53:55 -0300
Subject: [PATCH] Add image to image pipeline (#25393)

* Add image to image pipeline

Add image to image pipeline

* remove swin2sr from tf auto

* make ImageToImage importable

* make style

make style

make style

make style

* remove tf support

* remove nonused imports

* fix postprocessing

* add important comments; add unit tests

* add documentation

* remove support for TF

* make fixup

* fix typehint Image.Image

* fix documentation code

* address review request; fix unittest type checking

* address review request; fix unittest type checking

* make fixup

* address reviews

* Update src/transformers/pipelines/image_to_image.py

Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com>

* enhance docs

* make style

* make style

* improve docetest time

* improve docetest time

* Update tests/pipelines/test_pipelines_image_to_image.py

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* Update tests/pipelines/test_pipelines_image_to_image.py

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* make fixup

* undo faulty merge

* undo faulty merge

* add image-to-image to test pipeline mixin

* Update src/transformers/pipelines/image_to_image.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* Update tests/pipelines/test_pipelines_image_to_image.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* improve docs

---------

Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com>
Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
---
 docs/source/en/main_classes/pipelines.md      |   6 +
 docs/source/en/model_doc/auto.md              |   4 +
 src/transformers/__init__.py                  |   6 +
 src/transformers/models/auto/__init__.py      |   4 +
 src/transformers/models/auto/modeling_auto.py |  12 ++
 src/transformers/pipelines/__init__.py        |  12 +-
 src/transformers/pipelines/image_to_image.py  | 134 ++++++++++++++++++
 src/transformers/utils/dummy_pt_objects.py    |  10 ++
 .../test_pipelines_image_to_image.py          |  85 +++++++++++
 tests/test_pipeline_mixin.py                  |   2 +
 utils/update_metadata.py                      |   1 +
 11 files changed, 275 insertions(+), 1 deletion(-)
 create mode 100644 src/transformers/pipelines/image_to_image.py
 create mode 100644 tests/pipelines/test_pipelines_image_to_image.py

diff --git a/docs/source/en/main_classes/pipelines.md b/docs/source/en/main_classes/pipelines.md
index a3bf567809..6a1175d1f6 100644
--- a/docs/source/en/main_classes/pipelines.md
+++ b/docs/source/en/main_classes/pipelines.md
@@ -352,6 +352,12 @@ Pipelines available for computer vision tasks include the following.
     - __call__
     - all
 
+### ImageToImagePipeline
+
+[[autodoc]] ImageToImagePipeline
+    - __call__
+    - all
+
 ### ObjectDetectionPipeline
 
 [[autodoc]] ObjectDetectionPipeline
diff --git a/docs/source/en/model_doc/auto.md b/docs/source/en/model_doc/auto.md
index 9390b96fc5..75d162bbcf 100644
--- a/docs/source/en/model_doc/auto.md
+++ b/docs/source/en/model_doc/auto.md
@@ -266,6 +266,10 @@ The following auto classes are available for the following computer vision tasks
 
 [[autodoc]] AutoModelForImageSegmentation
 
+### AutoModelForImageToImage
+
+[[autodoc]] AutoModelForImageToImage
+
 ### AutoModelForSemanticSegmentation
 
 [[autodoc]] AutoModelForSemanticSegmentation
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index cd06fd001f..db097b0ba6 100644
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -653,6 +653,7 @@ _import_structure = {
         "FillMaskPipeline",
         "ImageClassificationPipeline",
         "ImageSegmentationPipeline",
+        "ImageToImagePipeline",
         "ImageToTextPipeline",
         "JsonPipelineDataFormat",
         "NerPipeline",
@@ -1120,6 +1121,7 @@ else:
             "MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING",
             "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
             "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
+            "MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
             "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
             "MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
             "MODEL_FOR_MASKED_LM_MAPPING",
@@ -1157,6 +1159,7 @@ else:
             "AutoModelForDocumentQuestionAnswering",
             "AutoModelForImageClassification",
             "AutoModelForImageSegmentation",
+            "AutoModelForImageToImage",
             "AutoModelForInstanceSegmentation",
             "AutoModelForMaskedImageModeling",
             "AutoModelForMaskedLM",
@@ -4740,6 +4743,7 @@ if TYPE_CHECKING:
         FillMaskPipeline,
         ImageClassificationPipeline,
         ImageSegmentationPipeline,
+        ImageToImagePipeline,
         ImageToTextPipeline,
         JsonPipelineDataFormat,
         NerPipeline,
@@ -5157,6 +5161,7 @@ if TYPE_CHECKING:
             MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
             MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
             MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
+            MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
             MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
             MODEL_FOR_MASK_GENERATION_MAPPING,
             MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
@@ -5194,6 +5199,7 @@ if TYPE_CHECKING:
             AutoModelForDocumentQuestionAnswering,
             AutoModelForImageClassification,
             AutoModelForImageSegmentation,
+            AutoModelForImageToImage,
             AutoModelForInstanceSegmentation,
             AutoModelForMaskedImageModeling,
             AutoModelForMaskedLM,
diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py
index 12d79822fd..dc01c93406 100644
--- a/src/transformers/models/auto/__init__.py
+++ b/src/transformers/models/auto/__init__.py
@@ -50,6 +50,7 @@ else:
         "MODEL_FOR_DEPTH_ESTIMATION_MAPPING",
         "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
         "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
+        "MODEL_FOR_IMAGE_TO_IMAGE_MAPPING",
         "MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING",
         "MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING",
         "MODEL_FOR_MASKED_LM_MAPPING",
@@ -86,6 +87,7 @@ else:
         "AutoModelForDepthEstimation",
         "AutoModelForImageClassification",
         "AutoModelForImageSegmentation",
+        "AutoModelForImageToImage",
         "AutoModelForInstanceSegmentation",
         "AutoModelForMaskGeneration",
         "AutoModelForTextEncoding",
@@ -230,6 +232,7 @@ if TYPE_CHECKING:
             MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,
             MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
             MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
+            MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
             MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
             MODEL_FOR_MASK_GENERATION_MAPPING,
             MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,
@@ -267,6 +270,7 @@ if TYPE_CHECKING:
             AutoModelForDocumentQuestionAnswering,
             AutoModelForImageClassification,
             AutoModelForImageSegmentation,
+            AutoModelForImageToImage,
             AutoModelForInstanceSegmentation,
             AutoModelForMaskedImageModeling,
             AutoModelForMaskedLM,
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index 5945c30acd..c21a3d27b4 100755
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -1112,6 +1112,12 @@ MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES = OrderedDict(
     ]
 )
 
+MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES = OrderedDict(
+    [
+        ("swin2sr", "Swin2SRForImageSuperResolution"),
+    ]
+)
+
 MODEL_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_MAPPING_NAMES)
 MODEL_FOR_PRETRAINING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_PRETRAINING_MAPPING_NAMES)
 MODEL_WITH_LM_HEAD_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_WITH_LM_HEAD_MAPPING_NAMES)
@@ -1197,6 +1203,8 @@ MODEL_FOR_MASK_GENERATION_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL
 
 MODEL_FOR_TEXT_ENCODING_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES)
 
+MODEL_FOR_IMAGE_TO_IMAGE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES)
+
 
 class AutoModelForMaskGeneration(_BaseAutoModelClass):
     _model_mapping = MODEL_FOR_MASK_GENERATION_MAPPING
@@ -1206,6 +1214,10 @@ class AutoModelForTextEncoding(_BaseAutoModelClass):
     _model_mapping = MODEL_FOR_TEXT_ENCODING_MAPPING
 
 
+class AutoModelForImageToImage(_BaseAutoModelClass):
+    _model_mapping = MODEL_FOR_IMAGE_TO_IMAGE_MAPPING
+
+
 class AutoModel(_BaseAutoModelClass):
     _model_mapping = MODEL_MAPPING
 
diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
index 746089b4e5..ae6d20265a 100755
--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -29,7 +29,7 @@ from ..image_processing_utils import BaseImageProcessor
 from ..models.auto.configuration_auto import AutoConfig
 from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
 from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
-from ..models.auto.modeling_auto import AutoModelForDepthEstimation
+from ..models.auto.modeling_auto import AutoModelForDepthEstimation, AutoModelForImageToImage
 from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
 from ..tokenization_utils import PreTrainedTokenizer
 from ..utils import (
@@ -64,6 +64,7 @@ from .feature_extraction import FeatureExtractionPipeline
 from .fill_mask import FillMaskPipeline
 from .image_classification import ImageClassificationPipeline
 from .image_segmentation import ImageSegmentationPipeline
+from .image_to_image import ImageToImagePipeline
 from .image_to_text import ImageToTextPipeline
 from .mask_generation import MaskGenerationPipeline
 from .object_detection import ObjectDetectionPipeline
@@ -394,6 +395,13 @@ SUPPORTED_TASKS = {
         "default": {"model": {"pt": ("facebook/sam-vit-huge", "997b15")}},
         "type": "multimodal",
     },
+    "image-to-image": {
+        "impl": ImageToImagePipeline,
+        "tf": (),
+        "pt": (AutoModelForImageToImage,) if is_torch_available() else (),
+        "default": {"model": {"pt": ("caidas/swin2SR-classical-sr-x2-64", "4aaedcb")}},
+        "type": "image",
+    },
 }
 
 NO_FEATURE_EXTRACTOR_TASKS = set()
@@ -472,6 +480,7 @@ def check_task(task: str) -> Tuple[str, Dict, Any]:
             - `"image-classification"`
             - `"image-segmentation"`
             - `"image-to-text"`
+            - `"image-to-image"`
             - `"object-detection"`
             - `"question-answering"`
             - `"summarization"`
@@ -556,6 +565,7 @@ def pipeline(
             - `"fill-mask"`: will return a [`FillMaskPipeline`]:.
             - `"image-classification"`: will return a [`ImageClassificationPipeline`].
             - `"image-segmentation"`: will return a [`ImageSegmentationPipeline`].
+            - `"image-to-image"`: will return a [`ImageToImagePipeline`].
             - `"image-to-text"`: will return a [`ImageToTextPipeline`].
             - `"mask-generation"`: will return a [`MaskGenerationPipeline`].
             - `"object-detection"`: will return a [`ObjectDetectionPipeline`].
diff --git a/src/transformers/pipelines/image_to_image.py b/src/transformers/pipelines/image_to_image.py
new file mode 100644
index 0000000000..dbd88deb1e
--- /dev/null
+++ b/src/transformers/pipelines/image_to_image.py
@@ -0,0 +1,134 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Union
+
+import numpy as np
+
+from ..utils import (
+    add_end_docstrings,
+    is_torch_available,
+    is_vision_available,
+    logging,
+    requires_backends,
+)
+from .base import PIPELINE_INIT_ARGS, Pipeline
+
+
+if is_vision_available():
+    from PIL import Image
+
+    from ..image_utils import load_image
+
+if is_torch_available():
+    from ..models.auto.modeling_auto import MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES
+
+logger = logging.get_logger(__name__)
+
+
+@add_end_docstrings(PIPELINE_INIT_ARGS)
+class ImageToImagePipeline(Pipeline):
+    """
+    Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous
+    image input.
+
+    Example:
+
+    ```python
+    >>> from PIL import Image
+    >>> import requests
+
+    >>> from transformers import pipeline
+
+    >>> upscaler = pipeline("image-to-image", model="caidas/swin2SR-classical-sr-x2-64")
+    >>> img = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
+    >>> img = img.resize((64, 64))
+    >>> upscaled_img = upscaler(img)
+    >>> img.size
+    (64, 64)
+
+    >>> upscaled_img.size
+    (144, 144)
+    ```
+
+    This image to image pipeline can currently be loaded from [`pipeline`] using the following task identifier:
+    `"image-to-image"`.
+
+    See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=image-to-image).
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        requires_backends(self, "vision")
+        self.check_model_type(MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES)
+
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_params = {}
+        postprocess_params = {}
+        forward_params = {}
+
+        if "timeout" in kwargs:
+            preprocess_params["timeout"] = kwargs["timeout"]
+        if "head_mask" in kwargs:
+            forward_params["head_mask"] = kwargs["head_mask"]
+
+        return preprocess_params, forward_params, postprocess_params
+
+    def __call__(
+        self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs
+    ) -> Union["Image.Image", List["Image.Image"]]:
+        """
+        Transform the image(s) passed as inputs.
+
+        Args:
+            images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
+                The pipeline handles three types of images:
+
+                - A string containing a http link pointing to an image
+                - A string containing a local path to an image
+                - An image loaded in PIL directly
+
+                The pipeline accepts either a single image or a batch of images, which must then be passed as a string.
+                Images in a batch must all be in the same format: all as http links, all as local paths, or all as PIL
+                images.
+            timeout (`float`, *optional*, defaults to None):
+                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is used and
+                the call may block forever.
+
+        Return:
+            An image (Image.Image) or a list of images (List["Image.Image"]) containing result(s). If the input is a
+            single image, the return will be also a single image, if the input is a list of several images, it will
+            return a list of transformed images.
+        """
+        return super().__call__(images, **kwargs)
+
+    def _forward(self, model_inputs):
+        model_outputs = self.model(**model_inputs)
+        return model_outputs
+
+    def preprocess(self, image, timeout=None):
+        image = load_image(image, timeout=timeout)
+        inputs = self.image_processor(images=[image], return_tensors="pt")
+        return inputs
+
+    def postprocess(self, model_outputs):
+        images = []
+        if "reconstruction" in model_outputs.keys():
+            outputs = model_outputs.reconstruction
+        for output in outputs:
+            output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+            output = np.moveaxis(output, source=0, destination=-1)
+            output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
+            images.append(Image.fromarray(output))
+
+        return images if len(images) > 1 else images[0]
diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
index 215ba5647b..f30b04e0e7 100644
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -573,6 +573,9 @@ MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
 MODEL_FOR_IMAGE_SEGMENTATION_MAPPING = None
 
 
+MODEL_FOR_IMAGE_TO_IMAGE_MAPPING = None
+
+
 MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = None
 
 
@@ -728,6 +731,13 @@ class AutoModelForImageSegmentation(metaclass=DummyObject):
         requires_backends(self, ["torch"])
 
 
+class AutoModelForImageToImage(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
 class AutoModelForInstanceSegmentation(metaclass=DummyObject):
     _backends = ["torch"]
 
diff --git a/tests/pipelines/test_pipelines_image_to_image.py b/tests/pipelines/test_pipelines_image_to_image.py
new file mode 100644
index 0000000000..e9110bb692
--- /dev/null
+++ b/tests/pipelines/test_pipelines_image_to_image.py
@@ -0,0 +1,85 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_IMAGE_TO_IMAGE_MAPPING,
+    AutoImageProcessor,
+    AutoModelForImageToImage,
+    ImageToImagePipeline,
+    is_vision_available,
+    pipeline,
+)
+from transformers.testing_utils import (
+    is_pipeline_test,
+    require_torch,
+    require_vision,
+    slow,
+)
+
+
+if is_vision_available():
+    from PIL import Image
+
+else:
+
+    class Image:
+        @staticmethod
+        def open(*args, **kwargs):
+            pass
+
+
+@is_pipeline_test
+@require_torch
+@require_vision
+class ImageToImagePipelineTests(unittest.TestCase):
+    model_mapping = MODEL_FOR_IMAGE_TO_IMAGE_MAPPING
+    examples = [
+        Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
+        "http://images.cocodataset.org/val2017/000000039769.jpg",
+    ]
+
+    @require_torch
+    @require_vision
+    @slow
+    def test_pipeline(self):
+        model_id = "caidas/swin2SR-classical-sr-x2-64"
+        upscaler = pipeline("image-to-image", model=model_id)
+        upscaled_list = upscaler(self.examples)
+
+        self.assertEqual(len(upscaled_list), len(self.examples))
+        for output in upscaled_list:
+            self.assertIsInstance(output, Image.Image)
+
+        self.assertEqual(upscaled_list[0].size, (1296, 976))
+        self.assertEqual(upscaled_list[1].size, (1296, 976))
+
+    @require_torch
+    @require_vision
+    @slow
+    def test_pipeline_model_processor(self):
+        model_id = "caidas/swin2SR-classical-sr-x2-64"
+        model = AutoModelForImageToImage.from_pretrained(model_id)
+        image_processor = AutoImageProcessor.from_pretrained(model_id)
+
+        upscaler = ImageToImagePipeline(model=model, image_processor=image_processor)
+        upscaled_list = upscaler(self.examples)
+
+        self.assertEqual(len(upscaled_list), len(self.examples))
+        for output in upscaled_list:
+            self.assertIsInstance(output, Image.Image)
+
+        self.assertEqual(upscaled_list[0].size, (1296, 976))
+        self.assertEqual(upscaled_list[1].size, (1296, 976))
diff --git a/tests/test_pipeline_mixin.py b/tests/test_pipeline_mixin.py
index cf37ec5ad2..eebf4eaa25 100644
--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -40,6 +40,7 @@ from .pipelines.test_pipelines_feature_extraction import FeatureExtractionPipeli
 from .pipelines.test_pipelines_fill_mask import FillMaskPipelineTests
 from .pipelines.test_pipelines_image_classification import ImageClassificationPipelineTests
 from .pipelines.test_pipelines_image_segmentation import ImageSegmentationPipelineTests
+from .pipelines.test_pipelines_image_to_image import ImageToImagePipelineTests
 from .pipelines.test_pipelines_image_to_text import ImageToTextPipelineTests
 from .pipelines.test_pipelines_mask_generation import MaskGenerationPipelineTests
 from .pipelines.test_pipelines_object_detection import ObjectDetectionPipelineTests
@@ -70,6 +71,7 @@ pipeline_test_mapping = {
     "fill-mask": {"test": FillMaskPipelineTests},
     "image-classification": {"test": ImageClassificationPipelineTests},
     "image-segmentation": {"test": ImageSegmentationPipelineTests},
+    "image-to-image": {"test": ImageToImagePipelineTests},
     "image-to-text": {"test": ImageToTextPipelineTests},
     "mask-generation": {"test": MaskGenerationPipelineTests},
     "object-detection": {"test": ObjectDetectionPipelineTests},
diff --git a/utils/update_metadata.py b/utils/update_metadata.py
index f90aae69d5..9a233a082f 100644
--- a/utils/update_metadata.py
+++ b/utils/update_metadata.py
@@ -67,6 +67,7 @@ PIPELINE_TAGS_AND_AUTO_MODELS = [
     ("automatic-speech-recognition", "MODEL_FOR_CTC_MAPPING_NAMES", "AutoModelForCTC"),
     ("image-classification", "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES", "AutoModelForImageClassification"),
     ("image-segmentation", "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES", "AutoModelForImageSegmentation"),
+    ("image-to-image", "MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES", "AutoModelForImageToImage"),
     ("fill-mask", "MODEL_FOR_MASKED_LM_MAPPING_NAMES", "AutoModelForMaskedLM"),
     ("object-detection", "MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES", "AutoModelForObjectDetection"),
     (