Fix bugs in mllama image processing (#36156)

* fix: handle input_channel_dim == channels_last

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>

* fix: default PIL images to channels_last

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>

* Apply suggestions from code review

Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>

* fixup from review batch

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>

* test: add 1x1 PIL image to ambiguous channel test

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>

* fix(mllama): avoid 0 dimension for image with impractical aspect ratio

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>

---------

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>
This commit is contained in:
Travis Johnson
2025-03-11 03:22:48 -06:00
committed by GitHub
parent 1c4b62b219
commit d8663cb8c5
2 changed files with 44 additions and 7 deletions

View File

@@ -224,6 +224,36 @@ class MllamaImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape)
)
def test_call_channels_last(self):
# Initialize image_processing
image_processing = self.image_processing_class(**self.image_processor_dict)
# a white 1x1 pixel RGB image
image_inputs = [[np.full(shape=(1, 1, 3), fill_value=1.0, dtype=float)]]
encoded_images = image_processing(
image_inputs, return_tensors="pt", input_data_format="channels_last"
).pixel_values
expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs)
self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape))
def test_ambiguous_channel_pil_image(self):
# Initialize image_processing
image_processing = self.image_processing_class(**self.image_processor_dict)
image_inputs = [[Image.new("RGB", (1, 1))], [Image.new("RGB", (100, 1))]]
encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values
expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs)
self.assertEqual(tuple(encoded_images.shape), (2, *expected_output_image_shape))
def test_resize_impractical_aspect_ratio(self):
# Initialize image_processing
image_processing = self.image_processing_class(**self.image_processor_dict)
# Ensure that no error is raised even if the aspect ratio is impractical
image_inputs = [[Image.new("RGB", (9999999, 1))]]
encoded_images = image_processing(image_inputs, return_tensors="pt").pixel_values
expected_output_image_shape = self.image_processor_tester.expected_output_image_shape(image_inputs)
self.assertEqual(tuple(encoded_images.shape), (1, *expected_output_image_shape))
def test_call_pytorch(self):
# Initialize image_processing
image_processing = self.image_processing_class(**self.image_processor_dict)