Use non nested images and batched text Idefics2/3 (#34222)
* add support for non nested images and add tests * add tests error scenario * fix style * added single and no image to error tests
This commit is contained in:
@@ -226,6 +226,73 @@ class Idefics2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(inputs["input_ids"], expected_input_ids)
|
||||
# fmt: on
|
||||
|
||||
def test_non_nested_images_with_batched_text(self):
|
||||
processor = self.get_processor()
|
||||
processor.image_processor.do_image_splitting = False
|
||||
|
||||
image_str = "<image>"
|
||||
text_str_1 = "In this image, we see"
|
||||
text_str_2 = "bla, bla"
|
||||
|
||||
text = [
|
||||
image_str + text_str_1,
|
||||
text_str_2 + image_str + image_str,
|
||||
]
|
||||
images = [self.image1, self.image2, self.image3]
|
||||
|
||||
inputs = processor(text=text, images=images, padding=True)
|
||||
|
||||
self.assertEqual(inputs["pixel_values"].shape, (2, 2, 3, 767, 980))
|
||||
self.assertEqual(inputs["pixel_attention_mask"].shape, (2, 2, 767, 980))
|
||||
|
||||
def test_process_interleaved_images_prompts_image_error(self):
|
||||
processor = self.get_processor()
|
||||
|
||||
text = [
|
||||
"This is a test sentence.",
|
||||
"In this other sentence we try some good things",
|
||||
]
|
||||
images = [[self.image1], [self.image2]]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [[self.image1], []]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
|
||||
text = [
|
||||
"This is a test sentence.<image>",
|
||||
"In this other sentence we try some good things<image>",
|
||||
]
|
||||
images = [[self.image1], [self.image2, self.image3]]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [[], [self.image2]]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [self.image1, self.image2, self.image3]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [self.image1]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
|
||||
text = [
|
||||
"This is a test sentence.",
|
||||
"In this other sentence we try some good things<image>",
|
||||
]
|
||||
images = [[self.image1], []]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [[], [self.image2]]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [self.image1, self.image2]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [self.image1]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
|
||||
def test_apply_chat_template(self):
|
||||
# Message contains content which a mix of lists with images and image urls and string
|
||||
messages = [
|
||||
@@ -275,13 +342,3 @@ class Idefics2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
return ["lower newer <image>", "<image> upper older longer string"] + ["<image> lower newer"] * (
|
||||
batch_size - 2
|
||||
)
|
||||
|
||||
# Override as PixtralProcessor needs nested images to work properly with batched inputs
|
||||
@require_vision
|
||||
def prepare_image_inputs(self, batch_size: Optional[int] = None):
|
||||
"""This function prepares a list of PIL images for testing"""
|
||||
if batch_size is None:
|
||||
return super().prepare_image_inputs()
|
||||
if batch_size < 1:
|
||||
raise ValueError("batch_size must be greater than 0")
|
||||
return [[super().prepare_image_inputs()]] * batch_size
|
||||
|
||||
@@ -250,6 +250,74 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(inputs["input_ids"], expected_input_ids)
|
||||
# fmt: on
|
||||
|
||||
def test_non_nested_images_with_batched_text(self):
|
||||
processor = self.get_processor()
|
||||
processor.image_processor.do_image_splitting = False
|
||||
|
||||
image_str = "<image>"
|
||||
text_str_1 = "In this image, we see"
|
||||
text_str_2 = "In this image, we see"
|
||||
|
||||
text = [
|
||||
image_str + text_str_1,
|
||||
image_str + image_str + text_str_2,
|
||||
]
|
||||
images = [self.image1, self.image2, self.image3]
|
||||
|
||||
inputs = processor(text=text, images=images, padding=True)
|
||||
|
||||
self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 364, 364))
|
||||
self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 364, 364))
|
||||
|
||||
# Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
|
||||
def test_process_interleaved_images_prompts_image_error(self):
|
||||
processor = self.get_processor()
|
||||
|
||||
text = [
|
||||
"This is a test sentence.",
|
||||
"In this other sentence we try some good things",
|
||||
]
|
||||
images = [[self.image1], [self.image2]]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [[self.image1], []]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
|
||||
text = [
|
||||
"This is a test sentence.<image>",
|
||||
"In this other sentence we try some good things<image>",
|
||||
]
|
||||
images = [[self.image1], [self.image2, self.image3]]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [[], [self.image2]]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [self.image1, self.image2, self.image3]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [self.image1]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
|
||||
text = [
|
||||
"This is a test sentence.",
|
||||
"In this other sentence we try some good things<image>",
|
||||
]
|
||||
images = [[self.image1], []]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [[], [self.image2]]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [self.image1, self.image2]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
images = [self.image1]
|
||||
with self.assertRaises(ValueError):
|
||||
processor(text=text, images=images, padding=True)
|
||||
|
||||
def test_apply_chat_template(self):
|
||||
# Message contains content which a mix of lists with images and image urls and string
|
||||
messages = [
|
||||
@@ -299,16 +367,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
batch_size - 2
|
||||
)
|
||||
|
||||
# Override as Idefics3Processor needs nested images to work properly with batched inputs
|
||||
@require_vision
|
||||
def prepare_image_inputs(self, batch_size: Optional[int] = None):
|
||||
"""This function prepares a list of PIL images for testing"""
|
||||
if batch_size is None:
|
||||
return super().prepare_image_inputs()
|
||||
if batch_size < 1:
|
||||
raise ValueError("batch_size must be greater than 0")
|
||||
return [[super().prepare_image_inputs()]] * batch_size
|
||||
|
||||
# Override tests as inputs_ids padded dimension is the second one but not the last one
|
||||
@require_vision
|
||||
@require_torch
|
||||
def test_kwargs_overrides_default_tokenizer_kwargs(self):
|
||||
|
||||
Reference in New Issue
Block a user