Fuyu: improve image processing (#27007)

* Fix Fuyu image scaling bug

It could produce negative padding and hence inference errors for certain
image sizes.

* initial rework commit

* add batching capabilities, refactor image processing

* add functional batching for a list of images and texts

* make args explicit

* Fuyu processing update (#27133)

* Add file headers

* Add file headers

* First pass - preprocess method with standard args

* First pass image processor rework

* Small tweaks

* More args and docstrings

* Tidying iterating over batch

* Tidying up

* Modify to have quick tests (for now)

* Fix up

* BatchFeature

* Passing tests

* Add tests for processor

* Sense check when patchifying

* Add some tests

* FuyuBatchFeature

* Post-process box coordinates

* Update to `size` in processor

* Remove unused and duplicate constants

* Store unpadded dims after resize

* Fix up

* Return FuyuBatchFeature

* Get unpadded sizes after resize

* Update exception

* Fix return

* Convert input `<box>` coordinates to model format.

* Post-process point coords, support multiple boxes/points in a single
sequence

* Replace constants

* Update src/transformers/models/fuyu/image_processing_fuyu.py

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>

* Preprocess List[List[image]]

* Update src/transformers/models/fuyu/image_processing_fuyu.py

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>

* Update to Amy's latest state.

* post-processing returns a list of tensors

* Fix error when target_sizes is None

Co-authored-by: Pablo Montalvo <pablo.montalvo.leroux@gmail.com>

* Update src/transformers/models/fuyu/image_processing_fuyu.py

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>

* Update src/transformers/models/fuyu/image_processing_fuyu.py

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>

* Update src/transformers/models/fuyu/image_processing_fuyu.py

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>

* Update src/transformers/models/fuyu/image_processing_fuyu.py

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>

* Review comments

* Update src/transformers/models/fuyu/image_processing_fuyu.py

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>

* Fix up

* Fix up

---------

Co-authored-by: Ubuntu <ubuntu@ip-172-31-72-126.ec2.internal>
Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
Co-authored-by: Pablo Montalvo <pablo.montalvo.leroux@gmail.com>

* Fix conflicts in fuyu_follow_up_image_processing (#27228)

fixing conflicts and updating on main

* Revert "Fix conflicts in fuyu_follow_up_image_processing" (#27232)

Revert "Fix conflicts in fuyu_follow_up_image_processing (#27228)"

This reverts commit acce10b6c653dc7041fb9d18cfed55775afd6207.

---------

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
Co-authored-by: Ubuntu <ubuntu@ip-172-31-72-126.ec2.internal>
This commit is contained in:
Pablo Montalvo
2023-11-02 12:25:41 +01:00
committed by GitHub
parent 9b25c164bd
commit 8a312956fd
7 changed files with 1175 additions and 476 deletions

View File

@@ -24,7 +24,8 @@ if is_vision_available():
@require_torchvision
class TestFuyuImageProcessor(unittest.TestCase):
def setUp(self):
self.processor = FuyuImageProcessor(target_height=160, target_width=320, padding_value=1.0)
self.size = {"height": 160, "width": 320}
self.processor = FuyuImageProcessor(size=self.size, padding_value=1.0)
self.batch_size = 3
self.channels = 3
self.height = 300
@@ -38,29 +39,25 @@ class TestFuyuImageProcessor(unittest.TestCase):
self.sample_image_pil = Image.fromarray(self.sample_image)
def test_patches(self):
expected_num_patches = self.processor.get_num_patches(
img_h=self.height, img_w=self.width, patch_dim_h=self.image_patch_dim_h, patch_dim_w=self.image_patch_dim_w
)
expected_num_patches = self.processor.get_num_patches(image_height=self.height, image_width=self.width)
patches_final = self.processor.patchify_image(
image=self.image_input, patch_dim_h=self.image_patch_dim_h, patch_dim_w=self.image_patch_dim_w
)
patches_final = self.processor.patchify_image(image=self.image_input)
assert (
patches_final.shape[1] == expected_num_patches
), f"Expected {expected_num_patches} patches, got {patches_final.shape[1]}."
def test_scale_to_target_aspect_ratio(self):
# (h:450, w:210) fitting (160, 320) -> (160, 210*160/450)
scaled_image = self.processor._scale_to_target_aspect_ratio(self.sample_image)
scaled_image = self.processor.resize(self.sample_image, size=self.size)
self.assertEqual(scaled_image.shape[0], 160)
self.assertEqual(scaled_image.shape[1], 74)
def test_apply_transformation_numpy(self):
transformed_image = self.processor.apply_transformation(self.sample_image)
self.assertEqual(transformed_image.shape[0], 160)
self.assertEqual(transformed_image.shape[1], 320)
transformed_image = self.processor.preprocess(self.sample_image).images[0][0]
self.assertEqual(transformed_image.shape[1], 160)
self.assertEqual(transformed_image.shape[2], 320)
def test_apply_transformation_pil(self):
transformed_image = self.processor.apply_transformation(self.sample_image_pil)
self.assertEqual(transformed_image.shape[0], 160)
self.assertEqual(transformed_image.shape[1], 320)
transformed_image = self.processor.preprocess(self.sample_image_pil).images[0][0]
self.assertEqual(transformed_image.shape[1], 160)
self.assertEqual(transformed_image.shape[2], 320)