Fix bugs in mllama image processing (#36156)

* fix: handle input_channel_dim == channels_last Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com> * fix: default PIL images to channels_last Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com> * Apply suggestions from code review Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com> * fixup from review batch Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com> * test: add 1x1 PIL image to ambiguous channel test Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com> * fix(mllama): avoid 0 dimension for image with impractical aspect ratio Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com> --------- Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com> Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>
2025-03-11 03:22:48 -06:00
parent 1c4b62b219
commit d8663cb8c5
2 changed files with 44 additions and 7 deletions
--- a/tests/models/mllama/test_image_processing_mllama.py
+++ b/tests/models/mllama/test_image_processing_mllama.py
@@ -224,6 +224,36 @@ class MllamaImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
            tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape)
        )

+    def test_call_channels_last(self):
+        # Initialize image_processing
+        image_processing = self.image_processing_class(**self.image_processor_dict)
+
+        # a white 1x1 pixel RGB image
+        image_inputs = [[np.full(shape=(1, 1, 3), fill_value=1.0, dtype=float)]]
+        encoded_images = image_processing(
+            image_inputs, return_tensors="pt", input_data_format="channels_last"
+        ).pixel_values
+        expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs)
+        self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape))
+
+    def test_ambiguous_channel_pil_image(self):
+        # Initialize image_processing
+        image_processing = self.image_processing_class(**self.image_processor_dict)
+
+        image_inputs = [[Image.new("RGB", (1, 1))], [Image.new("RGB", (100, 1))]]
+        encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values
+        expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs)
+        self.assertEqual(tuple(encoded_images.shape), (2, *expected_output_image_shape))
+
+    def test_resize_impractical_aspect_ratio(self):
+        # Initialize image_processing
+        image_processing = self.image_processing_class(**self.image_processor_dict)
+        # Ensure that no error is raised even if the aspect ratio is impractical
+        image_inputs = [[Image.new("RGB", (9999999, 1))]]
+        encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values
+        expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs)
+        self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape))
+
    def test_call_pytorch(self):
        # Initialize image_processing
        image_processing = self.image_processing_class(**self.image_processor_dict)