Add support for custom inputs and batched inputs in ProcessorTesterMixin (#33711)

* add support for custom inputs and batched inputs in ProcessorTesterMixin

* Fix batch_size behavior ProcessorTesterMixin

* Change format prepare inputs batched

* Remove override test pixtral processor

* Remove unnecessary tests and cleanup after new prepare_inputs functions

* Fix instructBlipVideo image processor
This commit is contained in:
Yoni Gozlan
2024-10-01 23:52:03 +02:00
committed by GitHub
parent 1baa08897d
commit 61ac161a9d
8 changed files with 95 additions and 269 deletions

View File

@@ -57,8 +57,11 @@ def make_batched_videos(videos) -> List[VideoInput]:
elif len(videos[0].shape) == 4: elif len(videos[0].shape) == 4:
return [list(video) for video in videos] return [list(video) for video in videos]
elif is_valid_image(videos) and len(videos.shape) == 4: elif is_valid_image(videos):
return [list(videos)] if isinstance(videos, PIL.Image.Image):
return [[videos]]
elif len(videos.shape) == 4:
return [list(videos)]
raise ValueError(f"Could not make batched video from {videos}") raise ValueError(f"Could not make batched video from {videos}")

View File

@@ -190,7 +190,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
# Fuyu uses tokenizer kwargs only when image is None. # Fuyu uses tokenizer kwargs only when image is None.
image_input = None image_input = None
@@ -218,7 +218,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
# Fuyu uses tokenizer kwargs only when image is None. # Fuyu uses tokenizer kwargs only when image is None.
image_input = None image_input = None
@@ -237,7 +237,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
# Fuyu uses tokenizer kwargs only when image is None. # Fuyu uses tokenizer kwargs only when image is None.
image_input = None image_input = None
@@ -264,7 +264,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
# Fuyu uses tokenizer kwargs only when image is None. # Fuyu uses tokenizer kwargs only when image is None.
image_input = None image_input = None
@@ -290,7 +290,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
# Fuyu uses tokenizer kwargs only when image is None. # Fuyu uses tokenizer kwargs only when image is None.
image_input = None image_input = None
inputs = processor( inputs = processor(
@@ -315,7 +315,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = ["lower newer", "upper older longer string"] input_str = self.prepare_text_inputs(batch_size=2)
# Fuyu uses tokenizer kwargs only when image is None. # Fuyu uses tokenizer kwargs only when image is None.
image_input = None image_input = None
inputs = processor( inputs = processor(

View File

@@ -17,6 +17,7 @@ import shutil
import tempfile import tempfile
import unittest import unittest
from io import BytesIO from io import BytesIO
from typing import Optional
import numpy as np import numpy as np
import requests import requests
@@ -284,44 +285,29 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
) )
self.assertEqual(rendered, expected_rendered) self.assertEqual(rendered, expected_rendered)
@require_torch # Override as Idefics3Processor needs image tokens in prompts
@require_vision def prepare_text_inputs(self, batch_size: Optional[int] = None):
def test_image_processor_defaults_preserved_by_image_kwargs(self): if batch_size is None:
if "image_processor" not in self.processor_class.attributes: return "lower newer <image>"
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer", max_length=117)
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) if batch_size < 1:
self.skip_processor_without_typed_kwargs(processor) raise ValueError("batch_size must be greater than 0")
input_str = "lower newer <image>" if batch_size == 1:
image_input = self.prepare_image_inputs() return ["lower newer <image>"]
return ["lower newer <image>", "<image> upper older longer string"] + ["<image> lower newer"] * (
inputs = processor(text=input_str, images=image_input) batch_size - 2
self.assertEqual(len(inputs["pixel_values"][0][0]), 3)
self.assertEqual(len(inputs["pixel_values"][0][0][0]), 364) # crop size doesn't affect our image processor
@require_torch
@require_vision
def test_kwargs_overrides_default_image_processor_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component(
"image_processor", max_image_size={"longest_edge": 32}, size={"longest_edge": 32}
) )
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor, image_seq_len=2) # Override as Idefics3Processor needs nested images to work properly with batched inputs
self.skip_processor_without_typed_kwargs(processor) @require_vision
def prepare_image_inputs(self, batch_size: Optional[int] = None):
input_str = "lower newer <image>" """This function prepares a list of PIL images for testing"""
image_input = self.prepare_image_inputs() if batch_size is None:
return super().prepare_image_inputs()
inputs = processor(text=input_str, images=image_input) if batch_size < 1:
self.assertEqual(len(inputs["pixel_values"][0][0]), 3) raise ValueError("batch_size must be greater than 0")
self.assertEqual(len(inputs["pixel_values"][0][0][0]), 32) return [[super().prepare_image_inputs()]] * batch_size
self.assertEqual(len(inputs["input_ids"][0]), 117)
@require_vision @require_vision
@require_torch @require_torch
@@ -333,7 +319,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer<image>" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, return_tensors="pt", max_length=30) inputs = processor(text=input_str, images=image_input, return_tensors="pt", max_length=30)
@@ -350,7 +336,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer<image>" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
# Define the kwargs for each modality # Define the kwargs for each modality
@@ -378,7 +364,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer<image>" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
# Define the kwargs for each modality # Define the kwargs for each modality
@@ -402,7 +388,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer<image>" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, return_tensors="pt") inputs = processor(text=input_str, images=image_input, return_tensors="pt")
@@ -419,11 +405,11 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = ["<image>lower newer", "<image>upper older longer string"] input_str = self.prepare_text_inputs(batch_size=2)
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs(batch_size=2)
inputs = processor( inputs = processor(
text=input_str, text=input_str,
images=[image_input, image_input], images=image_input,
return_tensors="pt", return_tensors="pt",
padding="longest", padding="longest",
max_length=76, max_length=76,
@@ -446,7 +432,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer<image>" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor( inputs = processor(
text=input_str, text=input_str,

View File

@@ -499,7 +499,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
# set image input to None # set image input to None
image_input = None image_input = None
@@ -525,7 +525,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
# Define the kwargs for each modality # Define the kwargs for each modality
@@ -551,7 +551,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
# Define the kwargs for each modality # Define the kwargs for each modality
@@ -574,7 +574,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
# set image input to None # set image input to None
image_input = None image_input = None
@@ -593,7 +593,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
# set image input to None # set image input to None
image_input = None image_input = None
inputs = processor( inputs = processor(
@@ -618,7 +618,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = ["lower newer", "upper older longer string"] input_str = self.prepare_text_inputs(batch_size=2)
# set image input to None # set image input to None
image_input = None image_input = None
inputs = processor( inputs = processor(

View File

@@ -17,7 +17,6 @@ import shutil
import tempfile import tempfile
import unittest import unittest
import numpy as np
import pytest import pytest
from transformers import AutoProcessor, CLIPTokenizerFast, OmDetTurboProcessor from transformers import AutoProcessor, CLIPTokenizerFast, OmDetTurboProcessor
@@ -36,8 +35,6 @@ if is_torch_available():
from transformers.models.omdet_turbo.modeling_omdet_turbo import OmDetTurboObjectDetectionOutput from transformers.models.omdet_turbo.modeling_omdet_turbo import OmDetTurboObjectDetectionOutput
if is_vision_available(): if is_vision_available():
from PIL import Image
from transformers import DetrImageProcessor from transformers import DetrImageProcessor
@@ -45,6 +42,7 @@ if is_vision_available():
@require_vision @require_vision
class OmDetTurboProcessorTest(ProcessorTesterMixin, unittest.TestCase): class OmDetTurboProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor_class = OmDetTurboProcessor processor_class = OmDetTurboProcessor
text_input_name = "classes_input_ids"
def setUp(self): def setUp(self):
self.tmpdirname = tempfile.mkdtemp() self.tmpdirname = tempfile.mkdtemp()
@@ -77,17 +75,6 @@ class OmDetTurboProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDown(self): def tearDown(self):
shutil.rmtree(self.tmpdirname) shutil.rmtree(self.tmpdirname)
def prepare_image_inputs(self):
"""This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
or a list of PyTorch tensors if one specifies torchify=True.
"""
image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
return image_inputs
def get_fake_omdet_turbo_output(self): def get_fake_omdet_turbo_output(self):
torch.manual_seed(42) torch.manual_seed(42)
return OmDetTurboObjectDetectionOutput( return OmDetTurboObjectDetectionOutput(
@@ -210,154 +197,3 @@ class OmDetTurboProcessorTest(ProcessorTesterMixin, unittest.TestCase):
inputs = processor(images=image_input, text=input_classes, task=input_tasks, return_tensors="pt") inputs = processor(images=image_input, text=input_classes, task=input_tasks, return_tensors="pt")
self.assertListEqual(list(inputs.keys()), self.input_keys) self.assertListEqual(list(inputs.keys()), self.input_keys)
@require_vision
@require_torch
def test_tokenizer_defaults_preserved_by_kwargs(self):
# Rewrite as OmDet-Turbo processor outputs "input_ids" for both tasks and classes.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer", max_length=117)
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(images=image_input, text=[input_str], task=input_str, return_tensors="pt")
self.assertEqual(len(inputs["tasks_input_ids"][0]), 117)
self.assertEqual(len(inputs["classes_input_ids"][0]), 117)
@require_vision
@require_torch
def test_kwargs_overrides_default_tokenizer_kwargs(self):
# Rewrite as OmDet-Turbo processor outputs "input_ids" for both tasks and classes.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer", max_length=117)
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(images=image_input, text=[input_str], task=input_str, return_tensors="pt", max_length=112)
self.assertEqual(len(inputs["tasks_input_ids"][0]), 112)
self.assertEqual(len(inputs["classes_input_ids"][0]), 112)
@require_torch
@require_vision
def test_unstructured_kwargs(self):
# Rewrite as OmDet-Turbo processor outputs "input_ids" for both tasks and classes.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(
images=image_input,
text=[input_str],
task=input_str,
return_tensors="pt",
size={"height": 214, "width": 214},
padding="max_length",
max_length=76,
)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["tasks_input_ids"][0]), 76)
self.assertEqual(len(inputs["classes_input_ids"][0]), 76)
@require_torch
@require_vision
def test_unstructured_kwargs_batched(self):
# Rewrite as OmDet-Turbo processor outputs "input_ids" for both tasks and classes.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
input_str = ["lower newer", "upper older longer string"]
image_input = self.prepare_image_inputs() * 2
inputs = processor(
images=image_input,
text=[input_str],
task=input_str,
return_tensors="pt",
size={"height": 214, "width": 214},
padding="longest",
max_length=76,
)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["tasks_input_ids"][0]), 6)
self.assertEqual(len(inputs["classes_input_ids"][0]), 6)
@require_torch
@require_vision
def test_structured_kwargs_nested(self):
# Rewrite as OmDet-Turbo processor outputs "input_ids" for both tasks and classes.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
# Define the kwargs for each modality
all_kwargs = {
"common_kwargs": {"return_tensors": "pt"},
"images_kwargs": {"size": {"height": 214, "width": 214}},
"text_kwargs": {"padding": "max_length", "max_length": 76, "task": input_str},
}
inputs = processor(images=image_input, text=[input_str], **all_kwargs)
self.skip_processor_without_typed_kwargs(processor)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["tasks_input_ids"][0]), 76)
self.assertEqual(len(inputs["classes_input_ids"][0]), 76)
@require_torch
@require_vision
def test_structured_kwargs_nested_from_dict(self):
# Rewrite as OmDet-Turbo processor outputs "input_ids" for both tasks and classes.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
# Define the kwargs for each modality
all_kwargs = {
"common_kwargs": {"return_tensors": "pt"},
"images_kwargs": {"size": {"height": 214, "width": 214}},
"text_kwargs": {"padding": "max_length", "max_length": 76, "task": input_str},
}
inputs = processor(images=image_input, text=[input_str], **all_kwargs)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["tasks_input_ids"][0]), 76)
self.assertEqual(len(inputs["classes_input_ids"][0]), 76)

View File

@@ -96,7 +96,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = Pix2StructProcessor(tokenizer=tokenizer, image_processor=image_processor) processor = Pix2StructProcessor(tokenizer=tokenizer, image_processor=image_processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
encoded_processor = processor(text=input_str) encoded_processor = processor(text=input_str)
@@ -111,7 +111,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = Pix2StructProcessor(tokenizer=tokenizer, image_processor=image_processor) processor = Pix2StructProcessor(tokenizer=tokenizer, image_processor=image_processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input) inputs = processor(text=input_str, images=image_input)
@@ -130,7 +130,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = Pix2StructProcessor(tokenizer=tokenizer, image_processor=image_processor) processor = Pix2StructProcessor(tokenizer=tokenizer, image_processor=image_processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input) inputs = processor(text=input_str, images=image_input)
@@ -168,7 +168,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = Pix2StructProcessor(tokenizer=tokenizer, image_processor=image_processor) processor = Pix2StructProcessor(tokenizer=tokenizer, image_processor=image_processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input) inputs = processor(text=input_str, images=image_input)
@@ -195,7 +195,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input) inputs = processor(text=input_str, images=image_input)
@@ -213,7 +213,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, max_patches=1024) inputs = processor(text=input_str, images=image_input, max_patches=1024)
@@ -231,7 +231,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor( inputs = processor(
text=input_str, text=input_str,
@@ -257,8 +257,8 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = ["lower newer", "upper older longer string"] input_str = self.prepare_text_inputs(batch_size=2)
image_input = self.prepare_image_inputs() * 2 image_input = self.prepare_image_inputs(batch_size=2)
inputs = processor( inputs = processor(
text=input_str, text=input_str,
images=image_input, images=image_input,
@@ -284,7 +284,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
# Define the kwargs for each modality # Define the kwargs for each modality
@@ -313,7 +313,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
# Define the kwargs for each modality # Define the kwargs for each modality

View File

@@ -14,6 +14,7 @@
import shutil import shutil
import tempfile import tempfile
import unittest import unittest
from typing import Optional
import requests import requests
import torch import torch
@@ -246,27 +247,11 @@ class PixtralProcessorTest(ProcessorTesterMixin, unittest.TestCase):
# fmt: on # fmt: on
# Override as PixtralProcessor needs nested images to work properly with batched inputs # Override as PixtralProcessor needs nested images to work properly with batched inputs
def test_unstructured_kwargs_batched(self): @require_vision
if "image_processor" not in self.processor_class.attributes: def prepare_image_inputs(self, batch_size: Optional[int] = None):
self.skipTest(f"image_processor attribute not present in {self.processor_class}") """This function prepares a list of PIL images for testing"""
processor_components = self.prepare_components() if batch_size is None:
processor = self.processor_class(**processor_components) return super().prepare_image_inputs()
self.skip_processor_without_typed_kwargs(processor) if batch_size < 1:
raise ValueError("batch_size must be greater than 0")
input_str = ["lower newer", "upper older longer string"] return [[super().prepare_image_inputs()]] * batch_size
image_input = [self.prepare_image_inputs()] * 2
inputs = processor(
text=input_str,
images=image_input,
return_tensors="pt",
do_rescale=True,
rescale_factor=-1,
padding="longest",
max_length=76,
)
self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0)
self.assertTrue(
len(inputs[self.text_input_name][0]) == len(inputs[self.text_input_name][1])
and len(inputs[self.text_input_name][1]) < 76
)

View File

@@ -17,6 +17,7 @@
import inspect import inspect
import json import json
import tempfile import tempfile
from typing import Optional
import numpy as np import numpy as np
@@ -86,10 +87,25 @@ class ProcessorTesterMixin:
processor = self.processor_class(**components, **self.prepare_processor_dict()) processor = self.processor_class(**components, **self.prepare_processor_dict())
return processor return processor
def prepare_text_inputs(self, batch_size: Optional[int] = None):
if batch_size is None:
return "lower newer"
if batch_size < 1:
raise ValueError("batch_size must be greater than 0")
if batch_size == 1:
return ["lower newer"]
return ["lower newer", "upper older longer string"] + ["lower newer"] * (batch_size - 2)
@require_vision @require_vision
def prepare_image_inputs(self): def prepare_image_inputs(self, batch_size: Optional[int] = None):
"""This function prepares a list of PIL images for testing""" """This function prepares a list of PIL images for testing"""
return prepare_image_inputs() if batch_size is None:
return prepare_image_inputs()[0]
if batch_size < 1:
raise ValueError("batch_size must be greater than 0")
return prepare_image_inputs() * batch_size
@require_vision @require_vision
def prepare_video_inputs(self): def prepare_video_inputs(self):
@@ -148,7 +164,7 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, return_tensors="pt") inputs = processor(text=input_str, images=image_input, return_tensors="pt")
self.assertEqual(inputs[self.text_input_name].shape[-1], 117) self.assertEqual(inputs[self.text_input_name].shape[-1], 117)
@@ -170,7 +186,7 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, return_tensors="pt") inputs = processor(text=input_str, images=image_input, return_tensors="pt")
@@ -184,7 +200,7 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor( inputs = processor(
text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length" text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
@@ -203,7 +219,7 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, do_rescale=True, rescale_factor=-1, return_tensors="pt") inputs = processor(text=input_str, images=image_input, do_rescale=True, rescale_factor=-1, return_tensors="pt")
@@ -216,7 +232,7 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
inputs = processor( inputs = processor(
text=input_str, text=input_str,
@@ -238,8 +254,8 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = ["lower newer", "upper older longer string"] input_str = self.prepare_text_inputs(batch_size=2)
image_input = self.prepare_image_inputs() * 2 image_input = self.prepare_image_inputs(batch_size=2)
inputs = processor( inputs = processor(
text=input_str, text=input_str,
images=image_input, images=image_input,
@@ -263,7 +279,7 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = ["lower newer"] input_str = [self.prepare_text_inputs()]
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
_ = processor( _ = processor(
@@ -281,7 +297,7 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
# Define the kwargs for each modality # Define the kwargs for each modality
@@ -303,7 +319,7 @@ class ProcessorTesterMixin:
processor_components = self.prepare_components() processor_components = self.prepare_components()
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
# Define the kwargs for each modality # Define the kwargs for each modality
@@ -326,7 +342,7 @@ class ProcessorTesterMixin:
processor = self.processor_class(**processor_components) processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor) self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer" input_str = self.prepare_text_inputs()
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
with self.assertRaises(ValueError): with self.assertRaises(ValueError):