fix the get_size_with_aspect_ratio in max_size situation (#30902)

* fix the get_size_with_aspect_ratio in max_size situation

* make fix-up

* add more general solution

* consider when max_size is not defined

* fix typo

* fix typo

* simple fix

* fix error

* fix if else error

* fix error of size overwrite

* fix yolos image processing

* fix detr image processing

* make

* add longest related test script

* Update src/transformers/models/yolos/image_processing_yolos.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* add more test

* add test script about longest size

* remove deprecated

---------

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Sangbum Daniel Choi
2024-06-04 00:12:08 +09:00
committed by GitHub
parent e4628434d8
commit 874ac129bb
10 changed files with 309 additions and 44 deletions

View File

@@ -100,21 +100,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size.
"""
height, width = image_size
raw_size = None
if max_size is not None:
min_original_size = float(min((height, width)))
max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size):
return height, width
if width < height:
oh, ow = height, width
elif width < height:
ow = size
if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else:
oh = size
if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow)

View File

@@ -98,21 +98,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size.
"""
height, width = image_size
raw_size = None
if max_size is not None:
min_original_size = float(min((height, width)))
max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size):
return height, width
if width < height:
oh, ow = height, width
elif width < height:
ow = size
if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else:
oh = size
if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow)

View File

@@ -91,21 +91,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size.
"""
height, width = image_size
raw_size = None
if max_size is not None:
min_original_size = float(min((height, width)))
max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size):
return height, width
if width < height:
oh, ow = height, width
elif width < height:
ow = size
if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else:
oh = size
if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow)

View File

@@ -98,21 +98,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size.
"""
height, width = image_size
raw_size = None
if max_size is not None:
min_original_size = float(min((height, width)))
max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size):
return height, width
if width < height:
oh, ow = height, width
elif width < height:
ow = size
if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else:
oh = size
if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow)

View File

@@ -105,21 +105,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The maximum allowed output size.
"""
height, width = image_size
raw_size = None
if max_size is not None:
min_original_size = float(min((height, width)))
max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if (height <= width and height == size) or (width <= height and width == size):
return height, width
if width < height:
oh, ow = height, width
elif width < height:
ow = size
if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
else:
oh = size
if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
return (oh, ow)

View File

@@ -101,9 +101,11 @@ def get_max_height_width(
return (max_height, max_width)
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
def get_size_with_aspect_ratio(
image_size: Tuple[int, int], size: int, max_size: Optional[int] = None, mod_size: int = 16
) -> Tuple[int, int]:
"""
Computes the output image size given the input image size and the desired output size.
Computes the output image size given the input image size and the desired output size with multiple of divisible_size.
Args:
image_size (`Tuple[int, int]`):
@@ -112,25 +114,40 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
The desired output size.
max_size (`int`, *optional*):
The maximum allowed output size.
mod_size (`int`, *optional*):
The size to make multiple of mod_size.
"""
height, width = image_size
raw_size = None
if max_size is not None:
min_original_size = float(min((height, width)))
max_original_size = float(max((height, width)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
raw_size = max_size * min_original_size / max_original_size
size = int(round(raw_size))
if width <= height and width != size:
height = int(size * height / width)
width = size
elif height < width and height != size:
width = int(size * width / height)
height = size
width_mod = np.mod(width, 16)
height_mod = np.mod(height, 16)
width = width - width_mod
height = height - height_mod
return (height, width)
if width < height:
ow = size
if max_size is not None and raw_size is not None:
oh = int(raw_size * height / width)
else:
oh = int(size * height / width)
elif (height <= width and height == size) or (width <= height and width == size):
oh, ow = height, width
else:
oh = size
if max_size is not None and raw_size is not None:
ow = int(raw_size * width / height)
else:
ow = int(size * width / height)
if mod_size is not None:
ow_mod = np.mod(ow, mod_size)
oh_mod = np.mod(oh, mod_size)
ow = ow - ow_mod
oh = oh - oh_mod
return (oh, ow)
# Copied from transformers.models.detr.image_processing_detr.get_image_size_for_max_height_width

View File

@@ -537,3 +537,55 @@ class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcess
)
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
def test_longest_edge_shortest_edge_resizing_strategy(self):
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
# max size is set; width < height;
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
# max size is set; height < width;
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
# max size is set; width == size; height > max_size;
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 118, "shortest_edge": 100},
do_pad=False,
)
inputs = image_processor(images=[image_3], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
# max size is set; height == size; width < max_size;
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 256, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_4], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
# max size is set; height == width; width < max_size;
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
image_processor = ConditionalDetrImageProcessor(
size={"longest_edge": 117, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_5], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))

View File

@@ -539,3 +539,55 @@ class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessi
)
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
def test_longest_edge_shortest_edge_resizing_strategy(self):
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
# max size is set; width < height;
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
# max size is set; height < width;
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
# max size is set; width == size; height > max_size;
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 118, "shortest_edge": 100},
do_pad=False,
)
inputs = image_processor(images=[image_3], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
# max size is set; height == size; width < max_size;
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 256, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_4], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
# max size is set; height == width; width < max_size;
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
image_processor = DeformableDetrImageProcessor(
size={"longest_edge": 117, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_5], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))

View File

@@ -593,3 +593,55 @@ class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixi
)
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
def test_longest_edge_shortest_edge_resizing_strategy(self):
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
# max size is set; width < height;
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
image_processor = DetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
# max size is set; height < width;
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
image_processor = DetrImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
# max size is set; width == size; height > max_size;
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
image_processor = DetrImageProcessor(
size={"longest_edge": 118, "shortest_edge": 100},
do_pad=False,
)
inputs = image_processor(images=[image_3], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
# max size is set; height == size; width < max_size;
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
image_processor = DetrImageProcessor(
size={"longest_edge": 256, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_4], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
# max size is set; height == width; width < max_size;
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
image_processor = DetrImageProcessor(
size={"longest_edge": 117, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_5], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))

View File

@@ -575,3 +575,55 @@ class GroundingDinoImageProcessingTest(AnnotationFormatTestMixin, ImageProcessin
)
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
def test_longest_edge_shortest_edge_resizing_strategy(self):
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
# max size is set; width < height;
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_1], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
# max size is set; height < width;
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 640, "shortest_edge": 640},
do_pad=False,
)
inputs = image_processor(images=[image_2], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
# max size is set; width == size; height > max_size;
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 118, "shortest_edge": 100},
do_pad=False,
)
inputs = image_processor(images=[image_3], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
# max size is set; height == size; width < max_size;
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 256, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_4], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
# max size is set; height == width; width < max_size;
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
image_processor = GroundingDinoImageProcessor(
size={"longest_edge": 117, "shortest_edge": 50},
do_pad=False,
)
inputs = image_processor(images=[image_5], return_tensors="pt")
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))