Add Idefics2/3 and SmolVLM Fast image processors + improvements for fast image processors (#38157)
* add working idefics2 fast and improvements for fast nested images processing * add fast image processors idefics 3 and smolvlm * cleanup tests * fic doc idefics2 * PR review and fix issues after merge * Force providing disable_grouping to group_images_by_shape * simplify group_images_by_shape * fix modular * Fix nits after review
This commit is contained in:
@@ -162,6 +162,10 @@ class ImageProcessingTestMixin:
|
||||
|
||||
self.image_processor_list = image_processor_list
|
||||
|
||||
def _assert_slow_fast_tensors_equivalence(self, slow_tensor, fast_tensor, atol=1e-1, rtol=1e-3, mean_atol=5e-3):
|
||||
torch.testing.assert_close(slow_tensor, fast_tensor, atol=atol, rtol=rtol)
|
||||
self.assertLessEqual(torch.mean(torch.abs(slow_tensor - fast_tensor)).item(), mean_atol)
|
||||
|
||||
@require_vision
|
||||
@require_torch
|
||||
def test_slow_fast_equivalence(self):
|
||||
@@ -179,10 +183,7 @@ class ImageProcessingTestMixin:
|
||||
|
||||
encoding_slow = image_processor_slow(dummy_image, return_tensors="pt")
|
||||
encoding_fast = image_processor_fast(dummy_image, return_tensors="pt")
|
||||
torch.testing.assert_close(encoding_slow.pixel_values, encoding_fast.pixel_values, atol=1e-1, rtol=1e-3)
|
||||
self.assertLessEqual(
|
||||
torch.mean(torch.abs(encoding_slow.pixel_values - encoding_fast.pixel_values)).item(), 5e-3
|
||||
)
|
||||
self._assert_slow_fast_tensors_equivalence(encoding_slow.pixel_values, encoding_fast.pixel_values)
|
||||
|
||||
@require_vision
|
||||
@require_torch
|
||||
@@ -205,10 +206,7 @@ class ImageProcessingTestMixin:
|
||||
encoding_slow = image_processor_slow(dummy_images, return_tensors="pt")
|
||||
encoding_fast = image_processor_fast(dummy_images, return_tensors="pt")
|
||||
|
||||
torch.testing.assert_close(encoding_slow.pixel_values, encoding_fast.pixel_values, atol=1e-1, rtol=1e-3)
|
||||
self.assertLessEqual(
|
||||
torch.mean(torch.abs(encoding_slow.pixel_values - encoding_fast.pixel_values)).item(), 5e-3
|
||||
)
|
||||
self._assert_slow_fast_tensors_equivalence(encoding_slow.pixel_values, encoding_fast.pixel_values)
|
||||
|
||||
@require_vision
|
||||
@require_torch
|
||||
@@ -577,8 +575,10 @@ class ImageProcessingTestMixin:
|
||||
|
||||
image_processor = torch.compile(image_processor, mode="reduce-overhead")
|
||||
output_compiled = image_processor(input_image, device=torch_device, return_tensors="pt")
|
||||
|
||||
torch.testing.assert_close(output_eager.pixel_values, output_compiled.pixel_values, rtol=1e-4, atol=1e-4)
|
||||
print(output_eager.pixel_values.dtype, output_compiled.pixel_values.dtype)
|
||||
self._assert_slow_fast_tensors_equivalence(
|
||||
output_eager.pixel_values, output_compiled.pixel_values, atol=1e-4, rtol=1e-4, mean_atol=1e-5
|
||||
)
|
||||
|
||||
|
||||
class AnnotationFormatTestMixin:
|
||||
|
||||
Reference in New Issue
Block a user