fix the get_size_with_aspect_ratio in max_size situation (#30902)
* fix the get_size_with_aspect_ratio in max_size situation * make fix-up * add more general solution * consider when max_size is not defined * fix typo * fix typo * simple fix * fix error * fix if else error * fix error of size overwrite * fix yolos image processing * fix detr image processing * make * add longest related test script * Update src/transformers/models/yolos/image_processing_yolos.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * add more test * add test script about longest size * remove deprecated --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
e4628434d8
commit
874ac129bb
@@ -100,21 +100,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
|
|||||||
The maximum allowed output size.
|
The maximum allowed output size.
|
||||||
"""
|
"""
|
||||||
height, width = image_size
|
height, width = image_size
|
||||||
|
raw_size = None
|
||||||
if max_size is not None:
|
if max_size is not None:
|
||||||
min_original_size = float(min((height, width)))
|
min_original_size = float(min((height, width)))
|
||||||
max_original_size = float(max((height, width)))
|
max_original_size = float(max((height, width)))
|
||||||
if max_original_size / min_original_size * size > max_size:
|
if max_original_size / min_original_size * size > max_size:
|
||||||
size = int(round(max_size * min_original_size / max_original_size))
|
raw_size = max_size * min_original_size / max_original_size
|
||||||
|
size = int(round(raw_size))
|
||||||
|
|
||||||
if (height <= width and height == size) or (width <= height and width == size):
|
if (height <= width and height == size) or (width <= height and width == size):
|
||||||
return height, width
|
oh, ow = height, width
|
||||||
|
elif width < height:
|
||||||
if width < height:
|
|
||||||
ow = size
|
ow = size
|
||||||
oh = int(size * height / width)
|
if max_size is not None and raw_size is not None:
|
||||||
|
oh = int(raw_size * height / width)
|
||||||
|
else:
|
||||||
|
oh = int(size * height / width)
|
||||||
else:
|
else:
|
||||||
oh = size
|
oh = size
|
||||||
ow = int(size * width / height)
|
if max_size is not None and raw_size is not None:
|
||||||
|
ow = int(raw_size * width / height)
|
||||||
|
else:
|
||||||
|
ow = int(size * width / height)
|
||||||
|
|
||||||
return (oh, ow)
|
return (oh, ow)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -98,21 +98,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
|
|||||||
The maximum allowed output size.
|
The maximum allowed output size.
|
||||||
"""
|
"""
|
||||||
height, width = image_size
|
height, width = image_size
|
||||||
|
raw_size = None
|
||||||
if max_size is not None:
|
if max_size is not None:
|
||||||
min_original_size = float(min((height, width)))
|
min_original_size = float(min((height, width)))
|
||||||
max_original_size = float(max((height, width)))
|
max_original_size = float(max((height, width)))
|
||||||
if max_original_size / min_original_size * size > max_size:
|
if max_original_size / min_original_size * size > max_size:
|
||||||
size = int(round(max_size * min_original_size / max_original_size))
|
raw_size = max_size * min_original_size / max_original_size
|
||||||
|
size = int(round(raw_size))
|
||||||
|
|
||||||
if (height <= width and height == size) or (width <= height and width == size):
|
if (height <= width and height == size) or (width <= height and width == size):
|
||||||
return height, width
|
oh, ow = height, width
|
||||||
|
elif width < height:
|
||||||
if width < height:
|
|
||||||
ow = size
|
ow = size
|
||||||
oh = int(size * height / width)
|
if max_size is not None and raw_size is not None:
|
||||||
|
oh = int(raw_size * height / width)
|
||||||
|
else:
|
||||||
|
oh = int(size * height / width)
|
||||||
else:
|
else:
|
||||||
oh = size
|
oh = size
|
||||||
ow = int(size * width / height)
|
if max_size is not None and raw_size is not None:
|
||||||
|
ow = int(raw_size * width / height)
|
||||||
|
else:
|
||||||
|
ow = int(size * width / height)
|
||||||
|
|
||||||
return (oh, ow)
|
return (oh, ow)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -91,21 +91,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
|
|||||||
The maximum allowed output size.
|
The maximum allowed output size.
|
||||||
"""
|
"""
|
||||||
height, width = image_size
|
height, width = image_size
|
||||||
|
raw_size = None
|
||||||
if max_size is not None:
|
if max_size is not None:
|
||||||
min_original_size = float(min((height, width)))
|
min_original_size = float(min((height, width)))
|
||||||
max_original_size = float(max((height, width)))
|
max_original_size = float(max((height, width)))
|
||||||
if max_original_size / min_original_size * size > max_size:
|
if max_original_size / min_original_size * size > max_size:
|
||||||
size = int(round(max_size * min_original_size / max_original_size))
|
raw_size = max_size * min_original_size / max_original_size
|
||||||
|
size = int(round(raw_size))
|
||||||
|
|
||||||
if (height <= width and height == size) or (width <= height and width == size):
|
if (height <= width and height == size) or (width <= height and width == size):
|
||||||
return height, width
|
oh, ow = height, width
|
||||||
|
elif width < height:
|
||||||
if width < height:
|
|
||||||
ow = size
|
ow = size
|
||||||
oh = int(size * height / width)
|
if max_size is not None and raw_size is not None:
|
||||||
|
oh = int(raw_size * height / width)
|
||||||
|
else:
|
||||||
|
oh = int(size * height / width)
|
||||||
else:
|
else:
|
||||||
oh = size
|
oh = size
|
||||||
ow = int(size * width / height)
|
if max_size is not None and raw_size is not None:
|
||||||
|
ow = int(raw_size * width / height)
|
||||||
|
else:
|
||||||
|
ow = int(size * width / height)
|
||||||
|
|
||||||
return (oh, ow)
|
return (oh, ow)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -98,21 +98,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
|
|||||||
The maximum allowed output size.
|
The maximum allowed output size.
|
||||||
"""
|
"""
|
||||||
height, width = image_size
|
height, width = image_size
|
||||||
|
raw_size = None
|
||||||
if max_size is not None:
|
if max_size is not None:
|
||||||
min_original_size = float(min((height, width)))
|
min_original_size = float(min((height, width)))
|
||||||
max_original_size = float(max((height, width)))
|
max_original_size = float(max((height, width)))
|
||||||
if max_original_size / min_original_size * size > max_size:
|
if max_original_size / min_original_size * size > max_size:
|
||||||
size = int(round(max_size * min_original_size / max_original_size))
|
raw_size = max_size * min_original_size / max_original_size
|
||||||
|
size = int(round(raw_size))
|
||||||
|
|
||||||
if (height <= width and height == size) or (width <= height and width == size):
|
if (height <= width and height == size) or (width <= height and width == size):
|
||||||
return height, width
|
oh, ow = height, width
|
||||||
|
elif width < height:
|
||||||
if width < height:
|
|
||||||
ow = size
|
ow = size
|
||||||
oh = int(size * height / width)
|
if max_size is not None and raw_size is not None:
|
||||||
|
oh = int(raw_size * height / width)
|
||||||
|
else:
|
||||||
|
oh = int(size * height / width)
|
||||||
else:
|
else:
|
||||||
oh = size
|
oh = size
|
||||||
ow = int(size * width / height)
|
if max_size is not None and raw_size is not None:
|
||||||
|
ow = int(raw_size * width / height)
|
||||||
|
else:
|
||||||
|
ow = int(size * width / height)
|
||||||
|
|
||||||
return (oh, ow)
|
return (oh, ow)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -105,21 +105,29 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
|
|||||||
The maximum allowed output size.
|
The maximum allowed output size.
|
||||||
"""
|
"""
|
||||||
height, width = image_size
|
height, width = image_size
|
||||||
|
raw_size = None
|
||||||
if max_size is not None:
|
if max_size is not None:
|
||||||
min_original_size = float(min((height, width)))
|
min_original_size = float(min((height, width)))
|
||||||
max_original_size = float(max((height, width)))
|
max_original_size = float(max((height, width)))
|
||||||
if max_original_size / min_original_size * size > max_size:
|
if max_original_size / min_original_size * size > max_size:
|
||||||
size = int(round(max_size * min_original_size / max_original_size))
|
raw_size = max_size * min_original_size / max_original_size
|
||||||
|
size = int(round(raw_size))
|
||||||
|
|
||||||
if (height <= width and height == size) or (width <= height and width == size):
|
if (height <= width and height == size) or (width <= height and width == size):
|
||||||
return height, width
|
oh, ow = height, width
|
||||||
|
elif width < height:
|
||||||
if width < height:
|
|
||||||
ow = size
|
ow = size
|
||||||
oh = int(size * height / width)
|
if max_size is not None and raw_size is not None:
|
||||||
|
oh = int(raw_size * height / width)
|
||||||
|
else:
|
||||||
|
oh = int(size * height / width)
|
||||||
else:
|
else:
|
||||||
oh = size
|
oh = size
|
||||||
ow = int(size * width / height)
|
if max_size is not None and raw_size is not None:
|
||||||
|
ow = int(raw_size * width / height)
|
||||||
|
else:
|
||||||
|
ow = int(size * width / height)
|
||||||
|
|
||||||
return (oh, ow)
|
return (oh, ow)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -101,9 +101,11 @@ def get_max_height_width(
|
|||||||
return (max_height, max_width)
|
return (max_height, max_width)
|
||||||
|
|
||||||
|
|
||||||
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
|
def get_size_with_aspect_ratio(
|
||||||
|
image_size: Tuple[int, int], size: int, max_size: Optional[int] = None, mod_size: int = 16
|
||||||
|
) -> Tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
Computes the output image size given the input image size and the desired output size.
|
Computes the output image size given the input image size and the desired output size with multiple of divisible_size.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
image_size (`Tuple[int, int]`):
|
image_size (`Tuple[int, int]`):
|
||||||
@@ -112,25 +114,40 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
|
|||||||
The desired output size.
|
The desired output size.
|
||||||
max_size (`int`, *optional*):
|
max_size (`int`, *optional*):
|
||||||
The maximum allowed output size.
|
The maximum allowed output size.
|
||||||
|
mod_size (`int`, *optional*):
|
||||||
|
The size to make multiple of mod_size.
|
||||||
"""
|
"""
|
||||||
height, width = image_size
|
height, width = image_size
|
||||||
|
raw_size = None
|
||||||
if max_size is not None:
|
if max_size is not None:
|
||||||
min_original_size = float(min((height, width)))
|
min_original_size = float(min((height, width)))
|
||||||
max_original_size = float(max((height, width)))
|
max_original_size = float(max((height, width)))
|
||||||
if max_original_size / min_original_size * size > max_size:
|
if max_original_size / min_original_size * size > max_size:
|
||||||
size = int(round(max_size * min_original_size / max_original_size))
|
raw_size = max_size * min_original_size / max_original_size
|
||||||
|
size = int(round(raw_size))
|
||||||
|
|
||||||
if width <= height and width != size:
|
if width < height:
|
||||||
height = int(size * height / width)
|
ow = size
|
||||||
width = size
|
if max_size is not None and raw_size is not None:
|
||||||
elif height < width and height != size:
|
oh = int(raw_size * height / width)
|
||||||
width = int(size * width / height)
|
else:
|
||||||
height = size
|
oh = int(size * height / width)
|
||||||
width_mod = np.mod(width, 16)
|
elif (height <= width and height == size) or (width <= height and width == size):
|
||||||
height_mod = np.mod(height, 16)
|
oh, ow = height, width
|
||||||
width = width - width_mod
|
else:
|
||||||
height = height - height_mod
|
oh = size
|
||||||
return (height, width)
|
if max_size is not None and raw_size is not None:
|
||||||
|
ow = int(raw_size * width / height)
|
||||||
|
else:
|
||||||
|
ow = int(size * width / height)
|
||||||
|
|
||||||
|
if mod_size is not None:
|
||||||
|
ow_mod = np.mod(ow, mod_size)
|
||||||
|
oh_mod = np.mod(oh, mod_size)
|
||||||
|
ow = ow - ow_mod
|
||||||
|
oh = oh - oh_mod
|
||||||
|
|
||||||
|
return (oh, ow)
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.detr.image_processing_detr.get_image_size_for_max_height_width
|
# Copied from transformers.models.detr.image_processing_detr.get_image_size_for_max_height_width
|
||||||
|
|||||||
@@ -537,3 +537,55 @@ class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcess
|
|||||||
)
|
)
|
||||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||||
|
|
||||||
|
def test_longest_edge_shortest_edge_resizing_strategy(self):
|
||||||
|
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
|
||||||
|
|
||||||
|
# max size is set; width < height;
|
||||||
|
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
|
||||||
|
image_processor = ConditionalDetrImageProcessor(
|
||||||
|
size={"longest_edge": 640, "shortest_edge": 640},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
|
||||||
|
|
||||||
|
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height < width;
|
||||||
|
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
|
||||||
|
image_processor = ConditionalDetrImageProcessor(
|
||||||
|
size={"longest_edge": 640, "shortest_edge": 640},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_2], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
|
||||||
|
|
||||||
|
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; width == size; height > max_size;
|
||||||
|
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
|
||||||
|
image_processor = ConditionalDetrImageProcessor(
|
||||||
|
size={"longest_edge": 118, "shortest_edge": 100},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_3], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
|
||||||
|
|
||||||
|
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height == size; width < max_size;
|
||||||
|
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
|
||||||
|
image_processor = ConditionalDetrImageProcessor(
|
||||||
|
size={"longest_edge": 256, "shortest_edge": 50},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_4], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
|
||||||
|
|
||||||
|
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height == width; width < max_size;
|
||||||
|
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
|
||||||
|
image_processor = ConditionalDetrImageProcessor(
|
||||||
|
size={"longest_edge": 117, "shortest_edge": 50},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_5], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))
|
||||||
|
|||||||
@@ -539,3 +539,55 @@ class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessi
|
|||||||
)
|
)
|
||||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||||
|
|
||||||
|
def test_longest_edge_shortest_edge_resizing_strategy(self):
|
||||||
|
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
|
||||||
|
|
||||||
|
# max size is set; width < height;
|
||||||
|
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
|
||||||
|
image_processor = DeformableDetrImageProcessor(
|
||||||
|
size={"longest_edge": 640, "shortest_edge": 640},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
|
||||||
|
|
||||||
|
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height < width;
|
||||||
|
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
|
||||||
|
image_processor = DeformableDetrImageProcessor(
|
||||||
|
size={"longest_edge": 640, "shortest_edge": 640},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_2], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
|
||||||
|
|
||||||
|
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; width == size; height > max_size;
|
||||||
|
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
|
||||||
|
image_processor = DeformableDetrImageProcessor(
|
||||||
|
size={"longest_edge": 118, "shortest_edge": 100},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_3], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
|
||||||
|
|
||||||
|
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height == size; width < max_size;
|
||||||
|
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
|
||||||
|
image_processor = DeformableDetrImageProcessor(
|
||||||
|
size={"longest_edge": 256, "shortest_edge": 50},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_4], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
|
||||||
|
|
||||||
|
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height == width; width < max_size;
|
||||||
|
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
|
||||||
|
image_processor = DeformableDetrImageProcessor(
|
||||||
|
size={"longest_edge": 117, "shortest_edge": 50},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_5], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))
|
||||||
|
|||||||
@@ -593,3 +593,55 @@ class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixi
|
|||||||
)
|
)
|
||||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||||
|
|
||||||
|
def test_longest_edge_shortest_edge_resizing_strategy(self):
|
||||||
|
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
|
||||||
|
|
||||||
|
# max size is set; width < height;
|
||||||
|
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
|
||||||
|
image_processor = DetrImageProcessor(
|
||||||
|
size={"longest_edge": 640, "shortest_edge": 640},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
|
||||||
|
|
||||||
|
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height < width;
|
||||||
|
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
|
||||||
|
image_processor = DetrImageProcessor(
|
||||||
|
size={"longest_edge": 640, "shortest_edge": 640},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_2], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
|
||||||
|
|
||||||
|
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; width == size; height > max_size;
|
||||||
|
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
|
||||||
|
image_processor = DetrImageProcessor(
|
||||||
|
size={"longest_edge": 118, "shortest_edge": 100},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_3], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
|
||||||
|
|
||||||
|
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height == size; width < max_size;
|
||||||
|
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
|
||||||
|
image_processor = DetrImageProcessor(
|
||||||
|
size={"longest_edge": 256, "shortest_edge": 50},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_4], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
|
||||||
|
|
||||||
|
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height == width; width < max_size;
|
||||||
|
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
|
||||||
|
image_processor = DetrImageProcessor(
|
||||||
|
size={"longest_edge": 117, "shortest_edge": 50},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_5], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))
|
||||||
|
|||||||
@@ -575,3 +575,55 @@ class GroundingDinoImageProcessingTest(AnnotationFormatTestMixin, ImageProcessin
|
|||||||
)
|
)
|
||||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||||
|
|
||||||
|
def test_longest_edge_shortest_edge_resizing_strategy(self):
|
||||||
|
image_1 = torch.ones([958, 653, 3], dtype=torch.uint8)
|
||||||
|
|
||||||
|
# max size is set; width < height;
|
||||||
|
# do_pad=False, longest_edge=640, shortest_edge=640, image=958x653 -> 640x436
|
||||||
|
image_processor = GroundingDinoImageProcessor(
|
||||||
|
size={"longest_edge": 640, "shortest_edge": 640},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 640, 436]))
|
||||||
|
|
||||||
|
image_2 = torch.ones([653, 958, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height < width;
|
||||||
|
# do_pad=False, longest_edge=640, shortest_edge=640, image=653x958 -> 436x640
|
||||||
|
image_processor = GroundingDinoImageProcessor(
|
||||||
|
size={"longest_edge": 640, "shortest_edge": 640},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_2], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 436, 640]))
|
||||||
|
|
||||||
|
image_3 = torch.ones([100, 120, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; width == size; height > max_size;
|
||||||
|
# do_pad=False, longest_edge=118, shortest_edge=100, image=120x100 -> 118x98
|
||||||
|
image_processor = GroundingDinoImageProcessor(
|
||||||
|
size={"longest_edge": 118, "shortest_edge": 100},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_3], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 98, 118]))
|
||||||
|
|
||||||
|
image_4 = torch.ones([128, 50, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height == size; width < max_size;
|
||||||
|
# do_pad=False, longest_edge=256, shortest_edge=50, image=50x128 -> 50x128
|
||||||
|
image_processor = GroundingDinoImageProcessor(
|
||||||
|
size={"longest_edge": 256, "shortest_edge": 50},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_4], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 128, 50]))
|
||||||
|
|
||||||
|
image_5 = torch.ones([50, 50, 3], dtype=torch.uint8)
|
||||||
|
# max size is set; height == width; width < max_size;
|
||||||
|
# do_pad=False, longest_edge=117, shortest_edge=50, image=50x50 -> 50x50
|
||||||
|
image_processor = GroundingDinoImageProcessor(
|
||||||
|
size={"longest_edge": 117, "shortest_edge": 50},
|
||||||
|
do_pad=False,
|
||||||
|
)
|
||||||
|
inputs = image_processor(images=[image_5], return_tensors="pt")
|
||||||
|
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 50, 50]))
|
||||||
|
|||||||
Reference in New Issue
Block a user