From 767e3518403c9e9eeb84cbbaaad168bf2a64c548 Mon Sep 17 00:00:00 2001 From: Pavel Iakubovskii Date: Wed, 24 Apr 2024 09:50:17 +0100 Subject: [PATCH] Fix YOLOS image processor resizing (#30436) * Add test for square image that fails * Fix for square images * Extend test cases * Fix resizing in tests * Style fixup --- .../models/yolos/image_processing_yolos.py | 2 +- .../yolos/test_image_processing_yolos.py | 29 +++++++++++++++---- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index c4e44854a0..b74819c7a1 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -120,7 +120,7 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in if max_original_size / min_original_size * size > max_size: size = int(round(max_size * min_original_size / max_original_size)) - if width < height and width != size: + if width <= height and width != size: height = int(size * height / width) width = size elif height < width and height != size: diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py index a1bc2ff172..f7465779b5 100644 --- a/tests/models/yolos/test_image_processing_yolos.py +++ b/tests/models/yolos/test_image_processing_yolos.py @@ -18,6 +18,8 @@ import json import pathlib import unittest +from parameterized import parameterized + from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available @@ -98,7 +100,7 @@ class YolosImageProcessingTester(unittest.TestCase): if max_original_size / min_original_size * size > max_size: size = int(round(max_size * min_original_size / max_original_size)) - if width < height and width != size: + if width <= height and width != size: height = int(size * height / width) width = size elif height < width and height != size: @@ -183,17 +185,32 @@ class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMix torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4) ) - def test_resize_max_size_respected(self): + @parameterized.expand( + [ + ((3, 100, 1500), 1333, 800), + ((3, 400, 400), 1333, 800), + ((3, 1500, 1500), 1333, 800), + ((3, 800, 1333), 1333, 800), + ((3, 1333, 800), 1333, 800), + ((3, 800, 800), 400, 400), + ] + ) + def test_resize_max_size_respected(self, image_size, longest_edge, shortest_edge): image_processor = self.image_processing_class(**self.image_processor_dict) # create torch tensors as image - image = torch.randint(0, 256, (3, 100, 1500), dtype=torch.uint8) + image = torch.randint(0, 256, image_size, dtype=torch.uint8) processed_image = image_processor( - image, size={"longest_edge": 1333, "shortest_edge": 800}, do_pad=False, return_tensors="pt" + image, + size={"longest_edge": longest_edge, "shortest_edge": shortest_edge}, + do_pad=False, + return_tensors="pt", )["pixel_values"] - self.assertTrue(processed_image.shape[-1] <= 1333) - self.assertTrue(processed_image.shape[-2] <= 800) + shape = list(processed_image.shape[-2:]) + max_size, min_size = max(shape), min(shape) + self.assertTrue(max_size <= 1333, f"Expected max_size <= 1333, got image shape {shape}") + self.assertTrue(min_size <= 800, f"Expected min_size <= 800, got image shape {shape}") @slow def test_call_pytorch_with_coco_detection_annotations(self):