Fix yolos resizing (#27663)
* Fix yolos resizing * Update tests * Add a test
This commit is contained in:
@@ -82,6 +82,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
|||||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
|
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
|
||||||
|
|
||||||
|
|
||||||
|
# From the original repo: https://github.com/facebookresearch/detr/blob/3af9fa878e73b6894ce3596450a8d9b89d918ca9/datasets/transforms.py#L76
|
||||||
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
|
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
Computes the output image size given the input image size and the desired output size.
|
Computes the output image size given the input image size and the desired output size.
|
||||||
|
|||||||
@@ -99,7 +99,6 @@ def get_max_height_width(
|
|||||||
return (max_height, max_width)
|
return (max_height, max_width)
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
|
|
||||||
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
|
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
Computes the output image size given the input image size and the desired output size.
|
Computes the output image size given the input image size and the desired output size.
|
||||||
@@ -119,16 +118,17 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
|
|||||||
if max_original_size / min_original_size * size > max_size:
|
if max_original_size / min_original_size * size > max_size:
|
||||||
size = int(round(max_size * min_original_size / max_original_size))
|
size = int(round(max_size * min_original_size / max_original_size))
|
||||||
|
|
||||||
if (height <= width and height == size) or (width <= height and width == size):
|
if width < height and width != size:
|
||||||
return height, width
|
height = int(size * height / width)
|
||||||
|
width = size
|
||||||
if width < height:
|
elif height < width and height != size:
|
||||||
ow = size
|
width = int(size * width / height)
|
||||||
oh = int(size * height / width)
|
height = size
|
||||||
else:
|
width_mod = np.mod(width, 16)
|
||||||
oh = size
|
height_mod = np.mod(height, 16)
|
||||||
ow = int(size * width / height)
|
width = width - width_mod
|
||||||
return (oh, ow)
|
height = height - height_mod
|
||||||
|
return (height, width)
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.detr.image_processing_detr.get_resize_output_image_size
|
# Copied from transformers.models.detr.image_processing_detr.get_resize_output_image_size
|
||||||
|
|||||||
@@ -86,18 +86,28 @@ class YolosImageProcessingTester(unittest.TestCase):
|
|||||||
if not batched:
|
if not batched:
|
||||||
image = image_inputs[0]
|
image = image_inputs[0]
|
||||||
if isinstance(image, Image.Image):
|
if isinstance(image, Image.Image):
|
||||||
w, h = image.size
|
width, height = image.size
|
||||||
else:
|
else:
|
||||||
h, w = image.shape[1], image.shape[2]
|
height, width = image.shape[1], image.shape[2]
|
||||||
if w < h:
|
|
||||||
expected_height = int(self.size["shortest_edge"] * h / w)
|
size = self.size["shortest_edge"]
|
||||||
expected_width = self.size["shortest_edge"]
|
max_size = self.size.get("longest_edge", None)
|
||||||
elif w > h:
|
if max_size is not None:
|
||||||
expected_height = self.size["shortest_edge"]
|
min_original_size = float(min((height, width)))
|
||||||
expected_width = int(self.size["shortest_edge"] * w / h)
|
max_original_size = float(max((height, width)))
|
||||||
else:
|
if max_original_size / min_original_size * size > max_size:
|
||||||
expected_height = self.size["shortest_edge"]
|
size = int(round(max_size * min_original_size / max_original_size))
|
||||||
expected_width = self.size["shortest_edge"]
|
|
||||||
|
if width < height and width != size:
|
||||||
|
height = int(size * height / width)
|
||||||
|
width = size
|
||||||
|
elif height < width and height != size:
|
||||||
|
width = int(size * width / height)
|
||||||
|
height = size
|
||||||
|
width_mod = width % 16
|
||||||
|
height_mod = height % 16
|
||||||
|
expected_width = width - width_mod
|
||||||
|
expected_height = height - height_mod
|
||||||
|
|
||||||
else:
|
else:
|
||||||
expected_values = []
|
expected_values = []
|
||||||
@@ -173,6 +183,18 @@ class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMix
|
|||||||
torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)
|
torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_resize_max_size_respected(self):
|
||||||
|
image_processor = self.image_processing_class(**self.image_processor_dict)
|
||||||
|
|
||||||
|
# create torch tensors as image
|
||||||
|
image = torch.randint(0, 256, (3, 100, 1500), dtype=torch.uint8)
|
||||||
|
processed_image = image_processor(
|
||||||
|
image, size={"longest_edge": 1333, "shortest_edge": 800}, do_pad=False, return_tensors="pt"
|
||||||
|
)["pixel_values"]
|
||||||
|
|
||||||
|
self.assertTrue(processed_image.shape[-1] <= 1333)
|
||||||
|
self.assertTrue(processed_image.shape[-2] <= 800)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_call_pytorch_with_coco_detection_annotations(self):
|
def test_call_pytorch_with_coco_detection_annotations(self):
|
||||||
# prepare image and target
|
# prepare image and target
|
||||||
|
|||||||
Reference in New Issue
Block a user