Fuyu: improve image processing (#27007)

* Fix Fuyu image scaling bug It could produce negative padding and hence inference errors for certain image sizes. * initial rework commit * add batching capabilities, refactor image processing * add functional batching for a list of images and texts * make args explicit * Fuyu processing update (#27133) * Add file headers * Add file headers * First pass - preprocess method with standard args * First pass image processor rework * Small tweaks * More args and docstrings * Tidying iterating over batch * Tidying up * Modify to have quick tests (for now) * Fix up * BatchFeature * Passing tests * Add tests for processor * Sense check when patchifying * Add some tests * FuyuBatchFeature * Post-process box coordinates * Update to `size` in processor * Remove unused and duplicate constants * Store unpadded dims after resize * Fix up * Return FuyuBatchFeature * Get unpadded sizes after resize * Update exception * Fix return * Convert input `<box>` coordinates to model format. * Post-process point coords, support multiple boxes/points in a single sequence * Replace constants * Update src/transformers/models/fuyu/image_processing_fuyu.py Co-authored-by: Pedro Cuenca <pedro@huggingface.co> * Preprocess List[List[image]] * Update src/transformers/models/fuyu/image_processing_fuyu.py Co-authored-by: Pedro Cuenca <pedro@huggingface.co> * Update to Amy's latest state. * post-processing returns a list of tensors * Fix error when target_sizes is None Co-authored-by: Pablo Montalvo <pablo.montalvo.leroux@gmail.com> * Update src/transformers/models/fuyu/image_processing_fuyu.py Co-authored-by: Pedro Cuenca <pedro@huggingface.co> * Update src/transformers/models/fuyu/image_processing_fuyu.py Co-authored-by: Pedro Cuenca <pedro@huggingface.co> * Update src/transformers/models/fuyu/image_processing_fuyu.py Co-authored-by: Pedro Cuenca <pedro@huggingface.co> * Update src/transformers/models/fuyu/image_processing_fuyu.py Co-authored-by: Pedro Cuenca <pedro@huggingface.co> * Review comments * Update src/transformers/models/fuyu/image_processing_fuyu.py Co-authored-by: Pedro Cuenca <pedro@huggingface.co> * Fix up * Fix up --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-72-126.ec2.internal> Co-authored-by: Pedro Cuenca <pedro@huggingface.co> Co-authored-by: Pablo Montalvo <pablo.montalvo.leroux@gmail.com> * Fix conflicts in fuyu_follow_up_image_processing (#27228) fixing conflicts and updating on main * Revert "Fix conflicts in fuyu_follow_up_image_processing" (#27232) Revert "Fix conflicts in fuyu_follow_up_image_processing (#27228)" This reverts commit acce10b6c653dc7041fb9d18cfed55775afd6207. --------- Co-authored-by: Pedro Cuenca <pedro@huggingface.co> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Co-authored-by: Ubuntu <ubuntu@ip-172-31-72-126.ec2.internal>
2023-11-02 12:25:41 +01:00
parent 9b25c164bd
commit 8a312956fd
7 changed files with 1175 additions and 476 deletions
--- a/tests/models/fuyu/test_image_processing_fuyu.py
+++ b/tests/models/fuyu/test_image_processing_fuyu.py
@@ -24,7 +24,8 @@ if is_vision_available():
@require_torchvision
 class TestFuyuImageProcessor(unittest.TestCase):
    def setUp(self):
-        self.processor = FuyuImageProcessor(target_height=160, target_width=320, padding_value=1.0)
+        self.size = {"height": 160, "width": 320}
+        self.processor = FuyuImageProcessor(size=self.size, padding_value=1.0)
        self.batch_size = 3
        self.channels = 3
        self.height = 300
@@ -38,29 +39,25 @@ class TestFuyuImageProcessor(unittest.TestCase):
        self.sample_image_pil = Image.fromarray(self.sample_image)

    def test_patches(self):
-        expected_num_patches = self.processor.get_num_patches(
-            img_h=self.height, img_w=self.width, patch_dim_h=self.image_patch_dim_h, patch_dim_w=self.image_patch_dim_w
-        )
+        expected_num_patches = self.processor.get_num_patches(image_height=self.height, image_width=self.width)

-        patches_final = self.processor.patchify_image(
-            image=self.image_input, patch_dim_h=self.image_patch_dim_h, patch_dim_w=self.image_patch_dim_w
-        )
+        patches_final = self.processor.patchify_image(image=self.image_input)
        assert (
            patches_final.shape[1] == expected_num_patches
        ), f"Expected {expected_num_patches} patches, got {patches_final.shape[1]}."

    def test_scale_to_target_aspect_ratio(self):
        # (h:450, w:210) fitting (160, 320) -> (160, 210*160/450)
-        scaled_image = self.processor._scale_to_target_aspect_ratio(self.sample_image)
+        scaled_image = self.processor.resize(self.sample_image, size=self.size)
        self.assertEqual(scaled_image.shape[0], 160)
        self.assertEqual(scaled_image.shape[1], 74)

    def test_apply_transformation_numpy(self):
-        transformed_image = self.processor.apply_transformation(self.sample_image)
-        self.assertEqual(transformed_image.shape[0], 160)
-        self.assertEqual(transformed_image.shape[1], 320)
+        transformed_image = self.processor.preprocess(self.sample_image).images[0][0]
+        self.assertEqual(transformed_image.shape[1], 160)
+        self.assertEqual(transformed_image.shape[2], 320)

    def test_apply_transformation_pil(self):
-        transformed_image = self.processor.apply_transformation(self.sample_image_pil)
-        self.assertEqual(transformed_image.shape[0], 160)
-        self.assertEqual(transformed_image.shape[1], 320)
+        transformed_image = self.processor.preprocess(self.sample_image_pil).images[0][0]
+        self.assertEqual(transformed_image.shape[1], 160)
+        self.assertEqual(transformed_image.shape[2], 320)