CLIPFeatureExtractor should resize images with kept aspect ratio (#11994)

* Resize with kept aspect ratio * Fixed failed test * Overload center_crop and resize methods instead * resize should handle non-PIL images * update slow test * Tensor => tensor Co-authored-by: patil-suraj <surajp815@gmail.com>
2021-06-10 15:10:41 +02:00
parent 472a867626
commit 9d2cee8b48
2 changed files with 56 additions and 2 deletions
--- a/src/transformers/models/clip/feature_extraction_clip.py
+++ b/src/transformers/models/clip/feature_extraction_clip.py
@@ -154,3 +154,56 @@ class CLIPFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
        encoded_inputs = BatchFeature(data=data, tensor_type=return_tensors)

        return encoded_inputs
+
+    def center_crop(self, image, size):
+        """
+        Crops :obj:`image` to the given size using a center crop. Note that if the image is too small to be cropped to
+        the size is given, it will be padded (so the returned result has the size asked).
+
+        Args:
+            image (:obj:`PIL.Image.Image` or :obj:`np.ndarray` or :obj:`torch.Tensor`):
+                The image to resize.
+            size (:obj:`int` or :obj:`Tuple[int, int]`):
+                The size to which crop the image.
+        """
+        self._ensure_format_supported(image)
+        if not isinstance(size, tuple):
+            size = (size, size)
+
+        if not isinstance(image, Image.Image):
+            image = self.to_pil_image(image)
+
+        image_width, image_height = image.size
+        crop_height, crop_width = size
+
+        crop_top = int((image_height - crop_height + 1) * 0.5)
+        crop_left = int((image_width - crop_width + 1) * 0.5)
+
+        return image.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height))
+
+    def resize(self, image, size, resample=Image.BICUBIC):
+        """
+        Resizes :obj:`image`. Note that this will trigger a conversion of :obj:`image` to a PIL Image.
+
+        Args:
+            image (:obj:`PIL.Image.Image` or :obj:`np.ndarray` or :obj:`torch.Tensor`):
+                The image to resize.
+            size (:obj:`int` or :obj:`Tuple[int, int]`):
+                The size to use for resizing the image. If :obj:`int` it will be resized to match the shorter side
+            resample (:obj:`int`, `optional`, defaults to :obj:`PIL.Image.BILINEAR`):
+                The filter to user for resampling.
+        """
+        self._ensure_format_supported(image)
+
+        if not isinstance(image, Image.Image):
+            image = self.to_pil_image(image)
+        if isinstance(size, tuple):
+            new_w, new_h = size
+        else:
+            width, height = image.size
+            short, long = (width, height) if width <= height else (height, width)
+            if short == size:
+                return image
+            new_short, new_long = size, int(size * long / short)
+            new_w, new_h = (new_short, new_long) if width <= height else (new_long, new_short)
+        return image.resize((new_w, new_h), resample)
--- a/tests/test_modeling_clip.py
+++ b/tests/test_modeling_clip.py
@@ -544,6 +544,7 @@ class CLIPModelIntegrationTest(unittest.TestCase):
        ).to(torch_device)

        # forward pass
+        with torch.no_grad():
            outputs = model(**inputs)

        # verify the logits
@@ -556,6 +557,6 @@ class CLIPModelIntegrationTest(unittest.TestCase):
            torch.Size((inputs.input_ids.shape[0], inputs.pixel_values.shape[0])),
        )

-        expected_logits = torch.tensor([[24.5056, 18.8076]], device=torch_device)
+        expected_logits = torch.tensor([[24.5701, 19.3049]], device=torch_device)

        self.assertTrue(torch.allclose(outputs.logits_per_image, expected_logits, atol=1e-3))