fix pixtral processor (#34486)

* fix pixtral processor

* test out full length batches + remove undue ValueError

* fix up processing

* fix tests

* fix

* last fixup

* style

* [run-slow] pixtral

* [run-slow] pixtral

* fix config key

* skip torchscript tests

* [run-slow] pixtral

* add missing key

* [run-slow] pixtral

* fix docs

* [run-slow] pixtral

* fix wrong url for integration test

* [run-slow] pixtral

* pixtralVisionModel does not have a lm head

* [run-slow] pixtral
This commit is contained in:
Pablo Montalvo
2024-10-30 14:17:20 +01:00
committed by GitHub
parent 8a734ea2c3
commit 241d79026f
5 changed files with 35 additions and 48 deletions

View File

@@ -171,7 +171,7 @@ class PixtralProcessorTest(ProcessorTesterMixin, unittest.TestCase):
input_ids[0].tolist(),
# Equivalent to ["USER: [IMG][IMG][IMG_BREAK][IMG][IMG][IMG_END][IMG][IMG][IMG_BREAK][IMG][IMG][IMG_END]\nWhat's the difference between these two images? ASSISTANT:"]
[21510, 1058, 1032, 10, 10, 12, 10, 10, 13, 10, 10, 12, 10, 10, 13, 1010, 7493, 1681, 1278, 6592, 2396, 2576, 2295, 8061, 1063, 1349, 4290, 16002, 41150, 1058]
)
)
# fmt: on
# Test passing in a url
@@ -246,6 +246,25 @@ class PixtralProcessorTest(ProcessorTesterMixin, unittest.TestCase):
)
# fmt: on
def test_processor_returns_full_length_batches(self):
# to avoid https://github.com/huggingface/transformers/issues/34204
processor = self.processor_class.from_pretrained(self.tmpdirname)
prompt_string = [
"USER: [IMG]\nWhat's the content of the image? ASSISTANT:",
] * 5
processor.tokenizer.pad_token = "</s>"
image_inputs = [self.image_0] * 5
# Make small for checking image token expansion
processor.image_processor.size = {"longest_edge": 30}
processor.image_processor.patch_size = {"height": 2, "width": 2}
# Test passing in an image
inputs_image = processor(text=prompt_string, images=image_inputs, return_tensors="pt", padding=True)
self.assertIn("input_ids", inputs_image)
self.assertTrue(len(inputs_image["input_ids"]) == 5)
self.assertTrue(len(inputs_image["pixel_values"]) == 5)
# Override as PixtralProcessor needs nested images to work properly with batched inputs
@require_vision
def prepare_image_inputs(self, batch_size: Optional[int] = None):