Modify ProcessorTesterMixin for better generalization (#32637)

* Add padding="max_length" to tokenizer kwargs and change crop_size to size for image_processor kwargs * remove crop_size argument in align processor tests to be coherent with base tests * Add pad_token when loading tokenizer if needed, change test override tokenizer kwargs, remove unnecessary test overwrites in grounding dino
2024-08-13 11:48:53 -04:00
parent c3cd9d807e
commit 5bcbdff159
3 changed files with 18 additions and 190 deletions
--- a/tests/models/align/test_processor_align.py
+++ b/tests/models/align/test_processor_align.py
@@ -66,8 +66,6 @@ class AlignProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        image_processor_map = {
            "do_resize": True,
            "size": 20,
            "do_center_crop": True,
            "crop_size": 18,
            "do_normalize": True,
            "image_mean": [0.48145466, 0.4578275, 0.40821073],
            "image_std": [0.26862954, 0.26130258, 0.27577711],
--- a/tests/models/grounding_dino/test_processor_grounding_dino.py
+++ b/tests/models/grounding_dino/test_processor_grounding_dino.py
@@ -263,177 +263,3 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        inputs = processor(text=input_str, images=image_input)
        self.assertListEqual(list(inputs.keys()), processor.model_input_names)
    @require_torch
    @require_vision
    def test_image_processor_defaults_preserved_by_image_kwargs(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor", size={"height": 234, "width": 234})
        tokenizer = self.get_component("tokenizer", max_length=117)
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
        inputs = processor(text=input_str, images=image_input)
        self.assertEqual(len(inputs["pixel_values"][0][0]), 234)
    @require_vision
    @require_torch
    def test_kwargs_overrides_default_tokenizer_kwargs(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor")
        tokenizer = self.get_component("tokenizer", max_length=117)
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
        inputs = processor(
            text=input_str, images=image_input, return_tensors="pt", padding="max_length", max_length=112
        )
        self.assertEqual(len(inputs["input_ids"][0]), 112)
    @require_vision
    @require_torch
    def test_tokenizer_defaults_preserved_by_kwargs(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor")
        tokenizer = self.get_component("tokenizer", max_length=117)
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
        inputs = processor(text=input_str, images=image_input, return_tensors="pt", padding="max_length")
        self.assertEqual(len(inputs["input_ids"][0]), 117)
    @require_torch
    @require_vision
    def test_kwargs_overrides_default_image_processor_kwargs(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor", size=(234, 234))
        tokenizer = self.get_component("tokenizer", max_length=117)
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
        inputs = processor(text=input_str, images=image_input, size=[224, 224])
        self.assertEqual(len(inputs["pixel_values"][0][0]), 224)
    @require_torch
    @require_vision
    def test_structured_kwargs_nested(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor")
        tokenizer = self.get_component("tokenizer")
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
        # Define the kwargs for each modality
        all_kwargs = {
            "common_kwargs": {"return_tensors": "pt"},
            "images_kwargs": {"size": {"height": 214, "width": 214}},
            "text_kwargs": {"padding": "max_length", "max_length": 76},
        }
        inputs = processor(text=input_str, images=image_input, **all_kwargs)
        self.skip_processor_without_typed_kwargs(processor)
        self.assertEqual(inputs["pixel_values"].shape[2], 214)
        self.assertEqual(len(inputs["input_ids"][0]), 76)
    @require_torch
    @require_vision
    def test_structured_kwargs_nested_from_dict(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor")
        tokenizer = self.get_component("tokenizer")
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
        # Define the kwargs for each modality
        all_kwargs = {
            "common_kwargs": {"return_tensors": "pt"},
            "images_kwargs": {"size": {"height": 214, "width": 214}},
            "text_kwargs": {"padding": "max_length", "max_length": 76},
        }
        inputs = processor(text=input_str, images=image_input, **all_kwargs)
        self.assertEqual(inputs["pixel_values"].shape[2], 214)
        self.assertEqual(len(inputs["input_ids"][0]), 76)
    @require_torch
    @require_vision
    def test_unstructured_kwargs(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor")
        tokenizer = self.get_component("tokenizer")
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
        inputs = processor(
            text=input_str,
            images=image_input,
            return_tensors="pt",
            size={"height": 214, "width": 214},
            padding="max_length",
            max_length=76,
        )
        self.assertEqual(inputs["pixel_values"].shape[2], 214)
        self.assertEqual(len(inputs["input_ids"][0]), 76)
    @require_torch
    @require_vision
    def test_unstructured_kwargs_batched(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor")
        tokenizer = self.get_component("tokenizer")
        if not tokenizer.pad_token:
            tokenizer.pad_token = "[TEST_PAD]"
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = ["lower newer", "upper older longer string"]
        image_input = self.prepare_image_inputs() * 2
        inputs = processor(
            text=input_str,
            images=image_input,
            return_tensors="pt",
            crop_size={"height": 214, "width": 214},
            size={"height": 214, "width": 214},
            padding="longest",
            max_length=76,
        )
        self.assertEqual(inputs["pixel_values"].shape[2], 214)
        self.assertEqual(len(inputs["input_ids"][0]), 6)
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -61,6 +61,8 @@ class ProcessorTesterMixin:
        component_class = processor_class_from_name(component_class_name)
        component = component_class.from_pretrained(self.tmpdirname, **kwargs)  # noqa
        if attribute == "tokenizer" and not component.pad_token:
            component.pad_token = "[TEST_PAD]"
        return component
@@ -126,7 +128,7 @@ class ProcessorTesterMixin:
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor")
-        tokenizer = self.get_component("tokenizer", max_length=117)
+        tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
@@ -141,8 +143,8 @@ class ProcessorTesterMixin:
    def test_image_processor_defaults_preserved_by_image_kwargs(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
-        image_processor = self.get_component("image_processor", crop_size=(234, 234))
+        image_processor = self.get_component("image_processor", size=(234, 234))
-        tokenizer = self.get_component("tokenizer", max_length=117)
+        tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
@@ -159,14 +161,16 @@ class ProcessorTesterMixin:
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
        image_processor = self.get_component("image_processor")
-        tokenizer = self.get_component("tokenizer", max_length=117)
+        tokenizer = self.get_component("tokenizer", padding="longest")
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
-        inputs = processor(text=input_str, images=image_input, return_tensors="pt", max_length=112)
+        inputs = processor(
            text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
        )
        self.assertEqual(len(inputs["input_ids"][0]), 112)
    @require_torch
@@ -174,8 +178,8 @@ class ProcessorTesterMixin:
    def test_kwargs_overrides_default_image_processor_kwargs(self):
        if "image_processor" not in self.processor_class.attributes:
            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
-        image_processor = self.get_component("image_processor", crop_size=(234, 234))
+        image_processor = self.get_component("image_processor", size=(234, 234))
-        tokenizer = self.get_component("tokenizer", max_length=117)
+        tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
        processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
        self.skip_processor_without_typed_kwargs(processor)
@@ -183,7 +187,7 @@ class ProcessorTesterMixin:
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
-        inputs = processor(text=input_str, images=image_input, crop_size=[224, 224])
+        inputs = processor(text=input_str, images=image_input, size=[224, 224])
        self.assertEqual(len(inputs["pixel_values"][0][0]), 224)
    @require_torch
@@ -203,7 +207,7 @@ class ProcessorTesterMixin:
            text=input_str,
            images=image_input,
            return_tensors="pt",
-            crop_size={"height": 214, "width": 214},
+            size={"height": 214, "width": 214},
            padding="max_length",
            max_length=76,
        )
@@ -228,7 +232,7 @@ class ProcessorTesterMixin:
            text=input_str,
            images=image_input,
            return_tensors="pt",
-            crop_size={"height": 214, "width": 214},
+            size={"height": 214, "width": 214},
            padding="longest",
            max_length=76,
        )
@@ -254,8 +258,8 @@ class ProcessorTesterMixin:
            _ = processor(
                text=input_str,
                images=image_input,
-                images_kwargs={"crop_size": {"height": 222, "width": 222}},
+                images_kwargs={"size": {"height": 222, "width": 222}},
-                crop_size={"height": 214, "width": 214},
+                size={"height": 214, "width": 214},
            )
    @require_torch
@@ -275,7 +279,7 @@ class ProcessorTesterMixin:
        # Define the kwargs for each modality
        all_kwargs = {
            "common_kwargs": {"return_tensors": "pt"},
-            "images_kwargs": {"crop_size": {"height": 214, "width": 214}},
+            "images_kwargs": {"size": {"height": 214, "width": 214}},
            "text_kwargs": {"padding": "max_length", "max_length": 76},
        }
@@ -303,7 +307,7 @@ class ProcessorTesterMixin:
        # Define the kwargs for each modality
        all_kwargs = {
            "common_kwargs": {"return_tensors": "pt"},
-            "images_kwargs": {"crop_size": {"height": 214, "width": 214}},
+            "images_kwargs": {"size": {"height": 214, "width": 214}},
            "text_kwargs": {"padding": "max_length", "max_length": 76},
        }