Add support for args to ProcessorMixin for backward compatibility (#33479)

* add check and prepare args for BC to ProcessorMixin, improve ProcessorTesterMixin

* change size and crop_size in processor kwargs tests to do_rescale and rescale_factor

* remove unnecessary llava processor kwargs test overwrite

* nit

* change data_arg_name to input_name

* Remove unnecessary test override

* Remove unnecessary tests Paligemma

* Move test_prepare_and_validate_optional_call_args to TesterMixin, add docstring
This commit is contained in:
Yoni Gozlan
2024-09-20 11:40:59 -04:00
committed by GitHub
parent 31caf0b95f
commit c0c6815dc9
10 changed files with 173 additions and 812 deletions

View File

@@ -18,12 +18,6 @@ import inspect
import json
import tempfile
try:
from typing import Unpack
except ImportError:
from typing_extensions import Unpack
import numpy as np
from transformers.models.auto.processing_auto import processor_class_from_name
@@ -35,6 +29,12 @@ from transformers.testing_utils import (
from transformers.utils import is_vision_available
try:
from typing import Unpack
except ImportError:
from typing_extensions import Unpack
if is_vision_available():
from PIL import Image
@@ -50,6 +50,9 @@ def prepare_image_inputs():
@require_vision
class ProcessorTesterMixin:
processor_class = None
text_input_name = "input_ids"
images_input_name = "pixel_values"
videos_input_name = "pixel_values_videos"
def prepare_processor_dict(self):
return {}
@@ -139,68 +142,77 @@ class ProcessorTesterMixin:
def test_tokenizer_defaults_preserved_by_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
processor_components = self.prepare_components()
processor_components["tokenizer"] = self.get_component("tokenizer", max_length=117, padding="max_length")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, return_tensors="pt")
self.assertEqual(len(inputs["input_ids"][0]), 117)
self.assertEqual(inputs[self.text_input_name].shape[-1], 117)
def test_image_processor_defaults_preserved_by_image_kwargs(self):
"""
We use do_rescale=True, rescale_factor=-1 to ensure that image_processor kwargs are preserved in the processor.
We then check that the mean of the pixel_values is less than or equal to 0 after processing.
Since the original pixel_values are in [0, 255], this is a good indicator that the rescale_factor is indeed applied.
"""
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor", size=(234, 234))
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
processor_components = self.prepare_components()
processor_components["image_processor"] = self.get_component(
"image_processor", do_rescale=True, rescale_factor=-1
)
processor_components["tokenizer"] = self.get_component("tokenizer", max_length=117, padding="max_length")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input)
self.assertEqual(len(inputs["pixel_values"][0][0]), 234)
inputs = processor(text=input_str, images=image_input, return_tensors="pt")
self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0)
def test_kwargs_overrides_default_tokenizer_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer", padding="longest")
processor_components = self.prepare_components()
processor_components["tokenizer"] = self.get_component("tokenizer", padding="longest")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(
text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
)
self.assertEqual(len(inputs["input_ids"][0]), 112)
self.assertEqual(inputs[self.text_input_name].shape[-1], 112)
def test_kwargs_overrides_default_image_processor_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor", size=(234, 234))
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
processor_components = self.prepare_components()
processor_components["image_processor"] = self.get_component(
"image_processor", do_rescale=True, rescale_factor=1
)
processor_components["tokenizer"] = self.get_component("tokenizer", max_length=117, padding="max_length")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, size=[224, 224])
self.assertEqual(len(inputs["pixel_values"][0][0]), 224)
inputs = processor(text=input_str, images=image_input, do_rescale=True, rescale_factor=-1, return_tensors="pt")
self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0)
def test_unstructured_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor_components = self.prepare_components()
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
@@ -209,21 +221,20 @@ class ProcessorTesterMixin:
text=input_str,
images=image_input,
return_tensors="pt",
size={"height": 214, "width": 214},
do_rescale=True,
rescale_factor=-1,
padding="max_length",
max_length=76,
)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["input_ids"][0]), 76)
self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0)
self.assertEqual(inputs[self.text_input_name].shape[-1], 76)
def test_unstructured_kwargs_batched(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor_components = self.prepare_components()
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = ["lower newer", "upper older longer string"]
@@ -232,21 +243,23 @@ class ProcessorTesterMixin:
text=input_str,
images=image_input,
return_tensors="pt",
size={"height": 214, "width": 214},
do_rescale=True,
rescale_factor=-1,
padding="longest",
max_length=76,
)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["input_ids"][0]), 6)
self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0)
self.assertTrue(
len(inputs[self.text_input_name][0]) == len(inputs[self.text_input_name][1])
and len(inputs[self.text_input_name][1]) < 76
)
def test_doubly_passed_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor_components = self.prepare_components()
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = ["lower newer"]
@@ -255,17 +268,16 @@ class ProcessorTesterMixin:
_ = processor(
text=input_str,
images=image_input,
images_kwargs={"size": {"height": 222, "width": 222}},
size={"height": 214, "width": 214},
images_kwargs={"do_rescale": True, "rescale_factor": -1},
do_rescale=True,
return_tensors="pt",
)
def test_structured_kwargs_nested(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor_components = self.prepare_components()
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
@@ -274,25 +286,21 @@ class ProcessorTesterMixin:
# Define the kwargs for each modality
all_kwargs = {
"common_kwargs": {"return_tensors": "pt"},
"images_kwargs": {"size": {"height": 214, "width": 214}},
"images_kwargs": {"do_rescale": True, "rescale_factor": -1},
"text_kwargs": {"padding": "max_length", "max_length": 76},
}
inputs = processor(text=input_str, images=image_input, **all_kwargs)
self.skip_processor_without_typed_kwargs(processor)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["input_ids"][0]), 76)
self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0)
self.assertEqual(inputs[self.text_input_name].shape[-1], 76)
def test_structured_kwargs_nested_from_dict(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
processor_components = self.prepare_components()
processor = self.processor_class(**processor_components)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
@@ -300,14 +308,13 @@ class ProcessorTesterMixin:
# Define the kwargs for each modality
all_kwargs = {
"common_kwargs": {"return_tensors": "pt"},
"images_kwargs": {"size": {"height": 214, "width": 214}},
"images_kwargs": {"do_rescale": True, "rescale_factor": -1},
"text_kwargs": {"padding": "max_length", "max_length": 76},
}
inputs = processor(text=input_str, images=image_input, **all_kwargs)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["input_ids"][0]), 76)
self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0)
self.assertEqual(inputs[self.text_input_name].shape[-1], 76)
# TODO: the same test, but for audio + text processors that have strong overlap in kwargs
# TODO (molbap) use the same structure of attribute kwargs for other tests to avoid duplication
@@ -335,3 +342,28 @@ class ProcessorTesterMixin:
padding="max_length",
text_kwargs={"padding": "do_not_pad"},
)
def test_prepare_and_validate_optional_call_args(self):
processor = self.get_processor()
optional_call_args_name = getattr(processor, "optional_call_args", [])
num_optional_call_args = len(optional_call_args_name)
if num_optional_call_args == 0:
self.skipTest("No optional call args")
# test all optional call args are given
optional_call_args = processor.prepare_and_validate_optional_call_args(
*(f"optional_{i}" for i in range(num_optional_call_args))
)
self.assertEqual(
optional_call_args, {arg_name: f"optional_{i}" for i, arg_name in enumerate(optional_call_args_name)}
)
# test only one optional call arg is given
optional_call_args = processor.prepare_and_validate_optional_call_args("optional_1")
self.assertEqual(optional_call_args, {optional_call_args_name[0]: "optional_1"})
# test no optional call arg is given
optional_call_args = processor.prepare_and_validate_optional_call_args()
self.assertEqual(optional_call_args, {})
# test too many optional call args are given
with self.assertRaises(ValueError):
processor.prepare_and_validate_optional_call_args(
*(f"optional_{i}" for i in range(num_optional_call_args + 1))
)