Modify ProcessorTesterMixin for better generalization (#32637)
* Add padding="max_length" to tokenizer kwargs and change crop_size to size for image_processor kwargs * remove crop_size argument in align processor tests to be coherent with base tests * Add pad_token when loading tokenizer if needed, change test override tokenizer kwargs, remove unnecessary test overwrites in grounding dino
This commit is contained in:
@@ -66,8 +66,6 @@ class AlignProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
image_processor_map = {
|
image_processor_map = {
|
||||||
"do_resize": True,
|
"do_resize": True,
|
||||||
"size": 20,
|
"size": 20,
|
||||||
"do_center_crop": True,
|
|
||||||
"crop_size": 18,
|
|
||||||
"do_normalize": True,
|
"do_normalize": True,
|
||||||
"image_mean": [0.48145466, 0.4578275, 0.40821073],
|
"image_mean": [0.48145466, 0.4578275, 0.40821073],
|
||||||
"image_std": [0.26862954, 0.26130258, 0.27577711],
|
"image_std": [0.26862954, 0.26130258, 0.27577711],
|
||||||
|
|||||||
@@ -263,177 +263,3 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
inputs = processor(text=input_str, images=image_input)
|
inputs = processor(text=input_str, images=image_input)
|
||||||
|
|
||||||
self.assertListEqual(list(inputs.keys()), processor.model_input_names)
|
self.assertListEqual(list(inputs.keys()), processor.model_input_names)
|
||||||
|
|
||||||
@require_torch
|
|
||||||
@require_vision
|
|
||||||
def test_image_processor_defaults_preserved_by_image_kwargs(self):
|
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
|
||||||
image_processor = self.get_component("image_processor", size={"height": 234, "width": 234})
|
|
||||||
tokenizer = self.get_component("tokenizer", max_length=117)
|
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
|
|
||||||
input_str = "lower newer"
|
|
||||||
image_input = self.prepare_image_inputs()
|
|
||||||
|
|
||||||
inputs = processor(text=input_str, images=image_input)
|
|
||||||
self.assertEqual(len(inputs["pixel_values"][0][0]), 234)
|
|
||||||
|
|
||||||
@require_vision
|
|
||||||
@require_torch
|
|
||||||
def test_kwargs_overrides_default_tokenizer_kwargs(self):
|
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
|
||||||
image_processor = self.get_component("image_processor")
|
|
||||||
tokenizer = self.get_component("tokenizer", max_length=117)
|
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
input_str = "lower newer"
|
|
||||||
image_input = self.prepare_image_inputs()
|
|
||||||
|
|
||||||
inputs = processor(
|
|
||||||
text=input_str, images=image_input, return_tensors="pt", padding="max_length", max_length=112
|
|
||||||
)
|
|
||||||
self.assertEqual(len(inputs["input_ids"][0]), 112)
|
|
||||||
|
|
||||||
@require_vision
|
|
||||||
@require_torch
|
|
||||||
def test_tokenizer_defaults_preserved_by_kwargs(self):
|
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
|
||||||
image_processor = self.get_component("image_processor")
|
|
||||||
tokenizer = self.get_component("tokenizer", max_length=117)
|
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
input_str = "lower newer"
|
|
||||||
image_input = self.prepare_image_inputs()
|
|
||||||
|
|
||||||
inputs = processor(text=input_str, images=image_input, return_tensors="pt", padding="max_length")
|
|
||||||
self.assertEqual(len(inputs["input_ids"][0]), 117)
|
|
||||||
|
|
||||||
@require_torch
|
|
||||||
@require_vision
|
|
||||||
def test_kwargs_overrides_default_image_processor_kwargs(self):
|
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
|
||||||
image_processor = self.get_component("image_processor", size=(234, 234))
|
|
||||||
tokenizer = self.get_component("tokenizer", max_length=117)
|
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
|
|
||||||
input_str = "lower newer"
|
|
||||||
image_input = self.prepare_image_inputs()
|
|
||||||
|
|
||||||
inputs = processor(text=input_str, images=image_input, size=[224, 224])
|
|
||||||
self.assertEqual(len(inputs["pixel_values"][0][0]), 224)
|
|
||||||
|
|
||||||
@require_torch
|
|
||||||
@require_vision
|
|
||||||
def test_structured_kwargs_nested(self):
|
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
|
||||||
image_processor = self.get_component("image_processor")
|
|
||||||
tokenizer = self.get_component("tokenizer")
|
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
|
|
||||||
input_str = "lower newer"
|
|
||||||
image_input = self.prepare_image_inputs()
|
|
||||||
|
|
||||||
# Define the kwargs for each modality
|
|
||||||
all_kwargs = {
|
|
||||||
"common_kwargs": {"return_tensors": "pt"},
|
|
||||||
"images_kwargs": {"size": {"height": 214, "width": 214}},
|
|
||||||
"text_kwargs": {"padding": "max_length", "max_length": 76},
|
|
||||||
}
|
|
||||||
|
|
||||||
inputs = processor(text=input_str, images=image_input, **all_kwargs)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
|
|
||||||
self.assertEqual(inputs["pixel_values"].shape[2], 214)
|
|
||||||
|
|
||||||
self.assertEqual(len(inputs["input_ids"][0]), 76)
|
|
||||||
|
|
||||||
@require_torch
|
|
||||||
@require_vision
|
|
||||||
def test_structured_kwargs_nested_from_dict(self):
|
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
|
||||||
|
|
||||||
image_processor = self.get_component("image_processor")
|
|
||||||
tokenizer = self.get_component("tokenizer")
|
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
input_str = "lower newer"
|
|
||||||
image_input = self.prepare_image_inputs()
|
|
||||||
|
|
||||||
# Define the kwargs for each modality
|
|
||||||
all_kwargs = {
|
|
||||||
"common_kwargs": {"return_tensors": "pt"},
|
|
||||||
"images_kwargs": {"size": {"height": 214, "width": 214}},
|
|
||||||
"text_kwargs": {"padding": "max_length", "max_length": 76},
|
|
||||||
}
|
|
||||||
|
|
||||||
inputs = processor(text=input_str, images=image_input, **all_kwargs)
|
|
||||||
self.assertEqual(inputs["pixel_values"].shape[2], 214)
|
|
||||||
|
|
||||||
self.assertEqual(len(inputs["input_ids"][0]), 76)
|
|
||||||
|
|
||||||
@require_torch
|
|
||||||
@require_vision
|
|
||||||
def test_unstructured_kwargs(self):
|
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
|
||||||
image_processor = self.get_component("image_processor")
|
|
||||||
tokenizer = self.get_component("tokenizer")
|
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
|
|
||||||
input_str = "lower newer"
|
|
||||||
image_input = self.prepare_image_inputs()
|
|
||||||
inputs = processor(
|
|
||||||
text=input_str,
|
|
||||||
images=image_input,
|
|
||||||
return_tensors="pt",
|
|
||||||
size={"height": 214, "width": 214},
|
|
||||||
padding="max_length",
|
|
||||||
max_length=76,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(inputs["pixel_values"].shape[2], 214)
|
|
||||||
self.assertEqual(len(inputs["input_ids"][0]), 76)
|
|
||||||
|
|
||||||
@require_torch
|
|
||||||
@require_vision
|
|
||||||
def test_unstructured_kwargs_batched(self):
|
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
|
||||||
image_processor = self.get_component("image_processor")
|
|
||||||
tokenizer = self.get_component("tokenizer")
|
|
||||||
if not tokenizer.pad_token:
|
|
||||||
tokenizer.pad_token = "[TEST_PAD]"
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
|
||||||
|
|
||||||
input_str = ["lower newer", "upper older longer string"]
|
|
||||||
image_input = self.prepare_image_inputs() * 2
|
|
||||||
inputs = processor(
|
|
||||||
text=input_str,
|
|
||||||
images=image_input,
|
|
||||||
return_tensors="pt",
|
|
||||||
crop_size={"height": 214, "width": 214},
|
|
||||||
size={"height": 214, "width": 214},
|
|
||||||
padding="longest",
|
|
||||||
max_length=76,
|
|
||||||
)
|
|
||||||
self.assertEqual(inputs["pixel_values"].shape[2], 214)
|
|
||||||
|
|
||||||
self.assertEqual(len(inputs["input_ids"][0]), 6)
|
|
||||||
|
|||||||
@@ -61,6 +61,8 @@ class ProcessorTesterMixin:
|
|||||||
|
|
||||||
component_class = processor_class_from_name(component_class_name)
|
component_class = processor_class_from_name(component_class_name)
|
||||||
component = component_class.from_pretrained(self.tmpdirname, **kwargs) # noqa
|
component = component_class.from_pretrained(self.tmpdirname, **kwargs) # noqa
|
||||||
|
if attribute == "tokenizer" and not component.pad_token:
|
||||||
|
component.pad_token = "[TEST_PAD]"
|
||||||
|
|
||||||
return component
|
return component
|
||||||
|
|
||||||
@@ -126,7 +128,7 @@ class ProcessorTesterMixin:
|
|||||||
if "image_processor" not in self.processor_class.attributes:
|
if "image_processor" not in self.processor_class.attributes:
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
||||||
image_processor = self.get_component("image_processor")
|
image_processor = self.get_component("image_processor")
|
||||||
tokenizer = self.get_component("tokenizer", max_length=117)
|
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
self.skip_processor_without_typed_kwargs(processor)
|
||||||
@@ -141,8 +143,8 @@ class ProcessorTesterMixin:
|
|||||||
def test_image_processor_defaults_preserved_by_image_kwargs(self):
|
def test_image_processor_defaults_preserved_by_image_kwargs(self):
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
if "image_processor" not in self.processor_class.attributes:
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
||||||
image_processor = self.get_component("image_processor", crop_size=(234, 234))
|
image_processor = self.get_component("image_processor", size=(234, 234))
|
||||||
tokenizer = self.get_component("tokenizer", max_length=117)
|
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
self.skip_processor_without_typed_kwargs(processor)
|
||||||
@@ -159,14 +161,16 @@ class ProcessorTesterMixin:
|
|||||||
if "image_processor" not in self.processor_class.attributes:
|
if "image_processor" not in self.processor_class.attributes:
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
||||||
image_processor = self.get_component("image_processor")
|
image_processor = self.get_component("image_processor")
|
||||||
tokenizer = self.get_component("tokenizer", max_length=117)
|
tokenizer = self.get_component("tokenizer", padding="longest")
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
self.skip_processor_without_typed_kwargs(processor)
|
||||||
input_str = "lower newer"
|
input_str = "lower newer"
|
||||||
image_input = self.prepare_image_inputs()
|
image_input = self.prepare_image_inputs()
|
||||||
|
|
||||||
inputs = processor(text=input_str, images=image_input, return_tensors="pt", max_length=112)
|
inputs = processor(
|
||||||
|
text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
|
||||||
|
)
|
||||||
self.assertEqual(len(inputs["input_ids"][0]), 112)
|
self.assertEqual(len(inputs["input_ids"][0]), 112)
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@@ -174,8 +178,8 @@ class ProcessorTesterMixin:
|
|||||||
def test_kwargs_overrides_default_image_processor_kwargs(self):
|
def test_kwargs_overrides_default_image_processor_kwargs(self):
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
if "image_processor" not in self.processor_class.attributes:
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
||||||
image_processor = self.get_component("image_processor", crop_size=(234, 234))
|
image_processor = self.get_component("image_processor", size=(234, 234))
|
||||||
tokenizer = self.get_component("tokenizer", max_length=117)
|
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
|
||||||
|
|
||||||
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
|
||||||
self.skip_processor_without_typed_kwargs(processor)
|
self.skip_processor_without_typed_kwargs(processor)
|
||||||
@@ -183,7 +187,7 @@ class ProcessorTesterMixin:
|
|||||||
input_str = "lower newer"
|
input_str = "lower newer"
|
||||||
image_input = self.prepare_image_inputs()
|
image_input = self.prepare_image_inputs()
|
||||||
|
|
||||||
inputs = processor(text=input_str, images=image_input, crop_size=[224, 224])
|
inputs = processor(text=input_str, images=image_input, size=[224, 224])
|
||||||
self.assertEqual(len(inputs["pixel_values"][0][0]), 224)
|
self.assertEqual(len(inputs["pixel_values"][0][0]), 224)
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@@ -203,7 +207,7 @@ class ProcessorTesterMixin:
|
|||||||
text=input_str,
|
text=input_str,
|
||||||
images=image_input,
|
images=image_input,
|
||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
crop_size={"height": 214, "width": 214},
|
size={"height": 214, "width": 214},
|
||||||
padding="max_length",
|
padding="max_length",
|
||||||
max_length=76,
|
max_length=76,
|
||||||
)
|
)
|
||||||
@@ -228,7 +232,7 @@ class ProcessorTesterMixin:
|
|||||||
text=input_str,
|
text=input_str,
|
||||||
images=image_input,
|
images=image_input,
|
||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
crop_size={"height": 214, "width": 214},
|
size={"height": 214, "width": 214},
|
||||||
padding="longest",
|
padding="longest",
|
||||||
max_length=76,
|
max_length=76,
|
||||||
)
|
)
|
||||||
@@ -254,8 +258,8 @@ class ProcessorTesterMixin:
|
|||||||
_ = processor(
|
_ = processor(
|
||||||
text=input_str,
|
text=input_str,
|
||||||
images=image_input,
|
images=image_input,
|
||||||
images_kwargs={"crop_size": {"height": 222, "width": 222}},
|
images_kwargs={"size": {"height": 222, "width": 222}},
|
||||||
crop_size={"height": 214, "width": 214},
|
size={"height": 214, "width": 214},
|
||||||
)
|
)
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@@ -275,7 +279,7 @@ class ProcessorTesterMixin:
|
|||||||
# Define the kwargs for each modality
|
# Define the kwargs for each modality
|
||||||
all_kwargs = {
|
all_kwargs = {
|
||||||
"common_kwargs": {"return_tensors": "pt"},
|
"common_kwargs": {"return_tensors": "pt"},
|
||||||
"images_kwargs": {"crop_size": {"height": 214, "width": 214}},
|
"images_kwargs": {"size": {"height": 214, "width": 214}},
|
||||||
"text_kwargs": {"padding": "max_length", "max_length": 76},
|
"text_kwargs": {"padding": "max_length", "max_length": 76},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -303,7 +307,7 @@ class ProcessorTesterMixin:
|
|||||||
# Define the kwargs for each modality
|
# Define the kwargs for each modality
|
||||||
all_kwargs = {
|
all_kwargs = {
|
||||||
"common_kwargs": {"return_tensors": "pt"},
|
"common_kwargs": {"return_tensors": "pt"},
|
||||||
"images_kwargs": {"crop_size": {"height": 214, "width": 214}},
|
"images_kwargs": {"size": {"height": 214, "width": 214}},
|
||||||
"text_kwargs": {"padding": "max_length", "max_length": 76},
|
"text_kwargs": {"padding": "max_length", "max_length": 76},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user