add uniform processors for altclip + chinese_clip (#31198)

* add initial design for uniform processors + align model * add uniform processors for altclip + chinese_clip * fix mutable default 👀 * add configuration test * handle structured kwargs w defaults + add test * protect torch-specific test * fix style * fix * rebase * update processor to generic kwargs + test * fix style * add sensible kwargs merge * update test * fix assertEqual * move kwargs merging to processing common * rework kwargs for type hinting * just get Unpack from extensions * run-slow[align] * handle kwargs passed as nested dict * add from_pretrained test for nested kwargs handling * [run-slow]align * update documentation + imports * update audio inputs * protect audio types, silly * try removing imports * make things simpler * simplerer * move out kwargs test to common mixin * [run-slow]align * skip tests for old processors * [run-slow]align, clip * !$#@!! protect imports, darn it * [run-slow]align, clip * [run-slow]align, clip * update common processor testing * add altclip * add chinese_clip * add pad_size * [run-slow]align, clip, chinese_clip, altclip * remove duplicated tests * fix * update doc * improve documentation for default values * add model_max_length testing This parameter depends on tokenizers received. * Raise if kwargs are specified in two places * fix * match defaults * force padding * fix tokenizer test * clean defaults * move tests to common * remove try/catch block * deprecate kwarg * format * add copyright + remove unused method * [run-slow]altclip, chinese_clip * clean imports * fix version * clean up deprecation * fix style * add corner case test on kwarg overlap * resume processing - add Unpack as importable * add tmpdirname * fix altclip * fix up * add back crop_size to specific tests * generalize tests to possible video_processor * add back crop_size arg * fixup overlapping kwargs test for qformer_tokenizer * remove copied from * fixup chinese_clip tests values * fixup tests - qformer tokenizers * [run-slow] altclip, chinese_clip * remove prepare_image_inputs
2024-09-19 17:21:54 +02:00
parent 4f0246e535
commit 413008c580
10 changed files with 463 additions and 52 deletions
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -146,7 +146,6 @@ class ProcessorTesterMixin:
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
-
        inputs = processor(text=input_str, images=image_input, return_tensors="pt")
        self.assertEqual(len(inputs["input_ids"][0]), 117)

@@ -175,7 +174,6 @@ class ProcessorTesterMixin:
        self.skip_processor_without_typed_kwargs(processor)
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()
-
        inputs = processor(
            text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
        )
@@ -238,7 +236,6 @@ class ProcessorTesterMixin:
            padding="longest",
            max_length=76,
        )
-
        self.assertEqual(inputs["pixel_values"].shape[2], 214)

        self.assertEqual(len(inputs["input_ids"][0]), 6)
@@ -311,3 +308,30 @@ class ProcessorTesterMixin:
        self.assertEqual(inputs["pixel_values"].shape[2], 214)

        self.assertEqual(len(inputs["input_ids"][0]), 76)
+
+    # TODO: the same test, but for audio + text processors that have strong overlap in kwargs
+    # TODO (molbap) use the same structure of attribute kwargs for other tests to avoid duplication
+    def test_overlapping_text_kwargs_handling(self):
+        if "image_processor" not in self.processor_class.attributes:
+            self.skipTest(f"image_processor attribute not present in {self.processor_class}")
+        processor_kwargs = {}
+        processor_kwargs["image_processor"] = self.get_component("image_processor")
+        processor_kwargs["tokenizer"] = tokenizer = self.get_component("tokenizer")
+        if not tokenizer.pad_token:
+            tokenizer.pad_token = "[TEST_PAD]"
+        if "video_processor" in self.processor_class.attributes:
+            processor_kwargs["video_processor"] = self.get_component("video_processor")
+        processor = self.processor_class(**processor_kwargs)
+        self.skip_processor_without_typed_kwargs(processor)
+
+        input_str = "lower newer"
+        image_input = self.prepare_image_inputs()
+
+        with self.assertRaises(ValueError):
+            _ = processor(
+                text=input_str,
+                images=image_input,
+                return_tensors="pt",
+                padding="max_length",
+                text_kwargs={"padding": "do_not_pad"},
+            )