Add Idefics2/3 and SmolVLM Fast image processors + improvements for fast image processors (#38157)

* add working idefics2 fast and improvements for fast nested images processing

* add fast image processors idefics 3 and smolvlm

* cleanup tests

* fic doc idefics2

* PR review and fix issues after merge

* Force providing disable_grouping to group_images_by_shape

* simplify group_images_by_shape

* fix modular

* Fix nits after review
This commit is contained in:
Yoni Gozlan
2025-06-23 10:17:25 -04:00
committed by GitHub
parent 1a96127e46
commit d29482cc91
61 changed files with 2023 additions and 425 deletions

View File

@@ -312,10 +312,7 @@ class VitMatteImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
encoding_slow = image_processor_slow(dummy_image, trimaps=dummy_trimap, return_tensors="pt")
encoding_fast = image_processor_fast(dummy_image, trimaps=dummy_trimap, return_tensors="pt")
self.assertTrue(torch.allclose(encoding_slow.pixel_values, encoding_fast.pixel_values, atol=1e-1))
self.assertLessEqual(
torch.mean(torch.abs(encoding_slow.pixel_values - encoding_fast.pixel_values)).item(), 1e-3
)
self._assert_slow_fast_tensors_equivalence(encoding_slow.pixel_values, encoding_fast.pixel_values)
def test_slow_fast_equivalence_batched(self):
# this only checks on equal resolution, since the slow processor doesn't work otherwise
@@ -338,10 +335,7 @@ class VitMatteImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
encoding_slow = image_processor_slow(dummy_images, trimaps=dummy_trimaps, return_tensors="pt")
encoding_fast = image_processor_fast(dummy_images, trimaps=dummy_trimaps, return_tensors="pt")
self.assertTrue(torch.allclose(encoding_slow.pixel_values, encoding_fast.pixel_values, atol=1e-1))
self.assertLessEqual(
torch.mean(torch.abs(encoding_slow.pixel_values - encoding_fast.pixel_values)).item(), 1e-3
)
self._assert_slow_fast_tensors_equivalence(encoding_slow.pixel_values, encoding_fast.pixel_values)
@slow
@require_torch_accelerator