Fix yolos resizing (#27663)

* Fix yolos resizing

* Update tests

* Add a test
This commit is contained in:
amyeroberts
2023-12-20 20:55:51 +00:00
committed by GitHub
parent 45b70384a7
commit 1d77735947
3 changed files with 45 additions and 22 deletions

View File

@@ -82,6 +82,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC) SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
# From the original repo: https://github.com/facebookresearch/detr/blob/3af9fa878e73b6894ce3596450a8d9b89d918ca9/datasets/transforms.py#L76
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]: def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
""" """
Computes the output image size given the input image size and the desired output size. Computes the output image size given the input image size and the desired output size.

View File

@@ -99,7 +99,6 @@ def get_max_height_width(
return (max_height, max_width) return (max_height, max_width)
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]: def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
""" """
Computes the output image size given the input image size and the desired output size. Computes the output image size given the input image size and the desired output size.
@@ -119,16 +118,17 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
if max_original_size / min_original_size * size > max_size: if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size)) size = int(round(max_size * min_original_size / max_original_size))
if (height <= width and height == size) or (width <= height and width == size): if width < height and width != size:
return height, width height = int(size * height / width)
width = size
if width < height: elif height < width and height != size:
ow = size width = int(size * width / height)
oh = int(size * height / width) height = size
else: width_mod = np.mod(width, 16)
oh = size height_mod = np.mod(height, 16)
ow = int(size * width / height) width = width - width_mod
return (oh, ow) height = height - height_mod
return (height, width)
# Copied from transformers.models.detr.image_processing_detr.get_resize_output_image_size # Copied from transformers.models.detr.image_processing_detr.get_resize_output_image_size

View File

@@ -86,18 +86,28 @@ class YolosImageProcessingTester(unittest.TestCase):
if not batched: if not batched:
image = image_inputs[0] image = image_inputs[0]
if isinstance(image, Image.Image): if isinstance(image, Image.Image):
w, h = image.size width, height = image.size
else: else:
h, w = image.shape[1], image.shape[2] height, width = image.shape[1], image.shape[2]
if w < h:
expected_height = int(self.size["shortest_edge"] * h / w) size = self.size["shortest_edge"]
expected_width = self.size["shortest_edge"] max_size = self.size.get("longest_edge", None)
elif w > h: if max_size is not None:
expected_height = self.size["shortest_edge"] min_original_size = float(min((height, width)))
expected_width = int(self.size["shortest_edge"] * w / h) max_original_size = float(max((height, width)))
else: if max_original_size / min_original_size * size > max_size:
expected_height = self.size["shortest_edge"] size = int(round(max_size * min_original_size / max_original_size))
expected_width = self.size["shortest_edge"]
if width < height and width != size:
height = int(size * height / width)
width = size
elif height < width and height != size:
width = int(size * width / height)
height = size
width_mod = width % 16
height_mod = height % 16
expected_width = width - width_mod
expected_height = height - height_mod
else: else:
expected_values = [] expected_values = []
@@ -173,6 +183,18 @@ class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMix
torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4) torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)
) )
def test_resize_max_size_respected(self):
image_processor = self.image_processing_class(**self.image_processor_dict)
# create torch tensors as image
image = torch.randint(0, 256, (3, 100, 1500), dtype=torch.uint8)
processed_image = image_processor(
image, size={"longest_edge": 1333, "shortest_edge": 800}, do_pad=False, return_tensors="pt"
)["pixel_values"]
self.assertTrue(processed_image.shape[-1] <= 1333)
self.assertTrue(processed_image.shape[-2] <= 800)
@slow @slow
def test_call_pytorch_with_coco_detection_annotations(self): def test_call_pytorch_with_coco_detection_annotations(self):
# prepare image and target # prepare image and target