From 767e3518403c9e9eeb84cbbaaad168bf2a64c548 Mon Sep 17 00:00:00 2001
From: Pavel Iakubovskii <qubvel@gmail.com>
Date: Wed, 24 Apr 2024 09:50:17 +0100
Subject: [PATCH] Fix YOLOS image processor resizing (#30436)

* Add test for square image that fails

* Fix for square images

* Extend test cases

* Fix resizing in tests

* Style fixup
---
 .../models/yolos/image_processing_yolos.py    |  2 +-
 .../yolos/test_image_processing_yolos.py      | 29 +++++++++++++++----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py
index c4e44854a0..b74819c7a1 100644
--- a/src/transformers/models/yolos/image_processing_yolos.py
+++ b/src/transformers/models/yolos/image_processing_yolos.py
@@ -120,7 +120,7 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
         if max_original_size / min_original_size * size > max_size:
             size = int(round(max_size * min_original_size / max_original_size))
 
-    if width < height and width != size:
+    if width <= height and width != size:
         height = int(size * height / width)
         width = size
     elif height < width and height != size:
diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py
index a1bc2ff172..f7465779b5 100644
--- a/tests/models/yolos/test_image_processing_yolos.py
+++ b/tests/models/yolos/test_image_processing_yolos.py
@@ -18,6 +18,8 @@ import json
 import pathlib
 import unittest
 
+from parameterized import parameterized
+
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
@@ -98,7 +100,7 @@ class YolosImageProcessingTester(unittest.TestCase):
                 if max_original_size / min_original_size * size > max_size:
                     size = int(round(max_size * min_original_size / max_original_size))
 
-            if width < height and width != size:
+            if width <= height and width != size:
                 height = int(size * height / width)
                 width = size
             elif height < width and height != size:
@@ -183,17 +185,32 @@ class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMix
             torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)
         )
 
-    def test_resize_max_size_respected(self):
+    @parameterized.expand(
+        [
+            ((3, 100, 1500), 1333, 800),
+            ((3, 400, 400), 1333, 800),
+            ((3, 1500, 1500), 1333, 800),
+            ((3, 800, 1333), 1333, 800),
+            ((3, 1333, 800), 1333, 800),
+            ((3, 800, 800), 400, 400),
+        ]
+    )
+    def test_resize_max_size_respected(self, image_size, longest_edge, shortest_edge):
         image_processor = self.image_processing_class(**self.image_processor_dict)
 
         # create torch tensors as image
-        image = torch.randint(0, 256, (3, 100, 1500), dtype=torch.uint8)
+        image = torch.randint(0, 256, image_size, dtype=torch.uint8)
         processed_image = image_processor(
-            image, size={"longest_edge": 1333, "shortest_edge": 800}, do_pad=False, return_tensors="pt"
+            image,
+            size={"longest_edge": longest_edge, "shortest_edge": shortest_edge},
+            do_pad=False,
+            return_tensors="pt",
         )["pixel_values"]
 
-        self.assertTrue(processed_image.shape[-1] <= 1333)
-        self.assertTrue(processed_image.shape[-2] <= 800)
+        shape = list(processed_image.shape[-2:])
+        max_size, min_size = max(shape), min(shape)
+        self.assertTrue(max_size <= 1333, f"Expected max_size <= 1333, got image shape {shape}")
+        self.assertTrue(min_size <= 800, f"Expected min_size <= 800, got image shape {shape}")
 
     @slow
     def test_call_pytorch_with_coco_detection_annotations(self):