Add fixed resize and pad strategy for object detection (#30742)
* Add resize and pad strategy * Merge get_size functions * Add pad_size + tests to object detection models * Fixup * Update docstrings * Fixup
This commit is contained in:
committed by
GitHub
parent
e9a8041d1c
commit
bf646fbf2d
@@ -490,3 +490,50 @@ class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcess
|
||||
).T
|
||||
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1))
|
||||
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1))
|
||||
|
||||
# Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_max_width_max_height_resizing_and_pad_strategy with Detr->ConditionalDetr
|
||||
def test_max_width_max_height_resizing_and_pad_strategy(self):
|
||||
image_1 = torch.ones([200, 100, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=False, max_height=100, max_width=100, image=200x100 -> 100x50
|
||||
image_processor = ConditionalDetrImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 50]))
|
||||
|
||||
# do_pad=False, max_height=300, max_width=100, image=200x100 -> 200x100
|
||||
image_processor = ConditionalDetrImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
|
||||
# do_pad=True, max_height=100, max_width=100, image=200x100 -> 100x100
|
||||
image_processor = ConditionalDetrImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100}, do_pad=True, pad_size={"height": 100, "width": 100}
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 100]))
|
||||
|
||||
# do_pad=True, max_height=300, max_width=100, image=200x100 -> 300x100
|
||||
image_processor = ConditionalDetrImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 301, "width": 101},
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 301, 101]))
|
||||
|
||||
### Check for batch
|
||||
image_2 = torch.ones([100, 150, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=True, max_height=150, max_width=100, images=[200x100, 100x150] -> 150x100
|
||||
image_processor = ConditionalDetrImageProcessor(
|
||||
size={"max_height": 150, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 150, "width": 100},
|
||||
)
|
||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||
|
||||
@@ -492,3 +492,50 @@ class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessi
|
||||
).T
|
||||
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1))
|
||||
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1))
|
||||
|
||||
# Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_max_width_max_height_resizing_and_pad_strategy with Detr->DeformableDetr
|
||||
def test_max_width_max_height_resizing_and_pad_strategy(self):
|
||||
image_1 = torch.ones([200, 100, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=False, max_height=100, max_width=100, image=200x100 -> 100x50
|
||||
image_processor = DeformableDetrImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 50]))
|
||||
|
||||
# do_pad=False, max_height=300, max_width=100, image=200x100 -> 200x100
|
||||
image_processor = DeformableDetrImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
|
||||
# do_pad=True, max_height=100, max_width=100, image=200x100 -> 100x100
|
||||
image_processor = DeformableDetrImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100}, do_pad=True, pad_size={"height": 100, "width": 100}
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 100]))
|
||||
|
||||
# do_pad=True, max_height=300, max_width=100, image=200x100 -> 300x100
|
||||
image_processor = DeformableDetrImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 301, "width": 101},
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 301, 101]))
|
||||
|
||||
### Check for batch
|
||||
image_2 = torch.ones([100, 150, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=True, max_height=150, max_width=100, images=[200x100, 100x150] -> 150x100
|
||||
image_processor = DeformableDetrImageProcessor(
|
||||
size={"max_height": 150, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 150, "width": 100},
|
||||
)
|
||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||
|
||||
@@ -486,3 +486,50 @@ class DetaImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixi
|
||||
).T
|
||||
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1))
|
||||
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1))
|
||||
|
||||
# Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_max_width_max_height_resizing_and_pad_strategy with Detr->Deta
|
||||
def test_max_width_max_height_resizing_and_pad_strategy(self):
|
||||
image_1 = torch.ones([200, 100, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=False, max_height=100, max_width=100, image=200x100 -> 100x50
|
||||
image_processor = DetaImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 50]))
|
||||
|
||||
# do_pad=False, max_height=300, max_width=100, image=200x100 -> 200x100
|
||||
image_processor = DetaImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
|
||||
# do_pad=True, max_height=100, max_width=100, image=200x100 -> 100x100
|
||||
image_processor = DetaImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100}, do_pad=True, pad_size={"height": 100, "width": 100}
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 100]))
|
||||
|
||||
# do_pad=True, max_height=300, max_width=100, image=200x100 -> 300x100
|
||||
image_processor = DetaImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 301, "width": 101},
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 301, 101]))
|
||||
|
||||
### Check for batch
|
||||
image_2 = torch.ones([100, 150, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=True, max_height=150, max_width=100, images=[200x100, 100x150] -> 150x100
|
||||
image_processor = DetaImageProcessor(
|
||||
size={"max_height": 150, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 150, "width": 100},
|
||||
)
|
||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||
|
||||
@@ -547,3 +547,49 @@ class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixi
|
||||
).T
|
||||
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1))
|
||||
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1))
|
||||
|
||||
def test_max_width_max_height_resizing_and_pad_strategy(self):
|
||||
image_1 = torch.ones([200, 100, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=False, max_height=100, max_width=100, image=200x100 -> 100x50
|
||||
image_processor = DetrImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 50]))
|
||||
|
||||
# do_pad=False, max_height=300, max_width=100, image=200x100 -> 200x100
|
||||
image_processor = DetrImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
|
||||
# do_pad=True, max_height=100, max_width=100, image=200x100 -> 100x100
|
||||
image_processor = DetrImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100}, do_pad=True, pad_size={"height": 100, "width": 100}
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 100]))
|
||||
|
||||
# do_pad=True, max_height=300, max_width=100, image=200x100 -> 300x100
|
||||
image_processor = DetrImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 301, "width": 101},
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 301, 101]))
|
||||
|
||||
### Check for batch
|
||||
image_2 = torch.ones([100, 150, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=True, max_height=150, max_width=100, images=[200x100, 100x150] -> 150x100
|
||||
image_processor = DetrImageProcessor(
|
||||
size={"max_height": 150, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 150, "width": 100},
|
||||
)
|
||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||
|
||||
@@ -528,3 +528,50 @@ class GroundingDinoImageProcessingTest(AnnotationFormatTestMixin, ImageProcessin
|
||||
).T
|
||||
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1))
|
||||
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1))
|
||||
|
||||
# Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_max_width_max_height_resizing_and_pad_strategy with Detr->GroundingDino
|
||||
def test_max_width_max_height_resizing_and_pad_strategy(self):
|
||||
image_1 = torch.ones([200, 100, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=False, max_height=100, max_width=100, image=200x100 -> 100x50
|
||||
image_processor = GroundingDinoImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 50]))
|
||||
|
||||
# do_pad=False, max_height=300, max_width=100, image=200x100 -> 200x100
|
||||
image_processor = GroundingDinoImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
|
||||
# do_pad=True, max_height=100, max_width=100, image=200x100 -> 100x100
|
||||
image_processor = GroundingDinoImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100}, do_pad=True, pad_size={"height": 100, "width": 100}
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 100]))
|
||||
|
||||
# do_pad=True, max_height=300, max_width=100, image=200x100 -> 300x100
|
||||
image_processor = GroundingDinoImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 301, "width": 101},
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 301, 101]))
|
||||
|
||||
### Check for batch
|
||||
image_2 = torch.ones([100, 150, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=True, max_height=150, max_width=100, images=[200x100, 100x150] -> 150x100
|
||||
image_processor = GroundingDinoImageProcessor(
|
||||
size={"max_height": 150, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 150, "width": 100},
|
||||
)
|
||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||
|
||||
@@ -546,3 +546,50 @@ class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMix
|
||||
).T
|
||||
self.assertTrue(torch.allclose(encoding["labels"][0]["boxes"], expected_boxes_0, rtol=1))
|
||||
self.assertTrue(torch.allclose(encoding["labels"][1]["boxes"], expected_boxes_1, rtol=1))
|
||||
|
||||
# Copied from tests.models.detr.test_image_processing_detr.DetrImageProcessingTest.test_max_width_max_height_resizing_and_pad_strategy with Detr->Yolos
|
||||
def test_max_width_max_height_resizing_and_pad_strategy(self):
|
||||
image_1 = torch.ones([200, 100, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=False, max_height=100, max_width=100, image=200x100 -> 100x50
|
||||
image_processor = YolosImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 50]))
|
||||
|
||||
# do_pad=False, max_height=300, max_width=100, image=200x100 -> 200x100
|
||||
image_processor = YolosImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=False,
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
|
||||
# do_pad=True, max_height=100, max_width=100, image=200x100 -> 100x100
|
||||
image_processor = YolosImageProcessor(
|
||||
size={"max_height": 100, "max_width": 100}, do_pad=True, pad_size={"height": 100, "width": 100}
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 100, 100]))
|
||||
|
||||
# do_pad=True, max_height=300, max_width=100, image=200x100 -> 300x100
|
||||
image_processor = YolosImageProcessor(
|
||||
size={"max_height": 300, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 301, "width": 101},
|
||||
)
|
||||
inputs = image_processor(images=[image_1], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([1, 3, 301, 101]))
|
||||
|
||||
### Check for batch
|
||||
image_2 = torch.ones([100, 150, 3], dtype=torch.uint8)
|
||||
|
||||
# do_pad=True, max_height=150, max_width=100, images=[200x100, 100x150] -> 150x100
|
||||
image_processor = YolosImageProcessor(
|
||||
size={"max_height": 150, "max_width": 100},
|
||||
do_pad=True,
|
||||
pad_size={"height": 150, "width": 100},
|
||||
)
|
||||
inputs = image_processor(images=[image_1, image_2], return_tensors="pt")
|
||||
self.assertEqual(inputs["pixel_values"].shape, torch.Size([2, 3, 150, 100]))
|
||||
|
||||
Reference in New Issue
Block a user