add uniform processors for altclip + chinese_clip (#31198)

* add initial design for uniform processors + align model

* add uniform processors for altclip + chinese_clip

* fix mutable default 👀

* add configuration test

* handle structured kwargs w defaults + add test

* protect torch-specific test

* fix style

* fix

* rebase

* update processor to generic kwargs + test

* fix style

* add sensible kwargs merge

* update test

* fix assertEqual

* move kwargs merging to processing common

* rework kwargs for type hinting

* just get Unpack from extensions

* run-slow[align]

* handle kwargs passed as nested dict

* add from_pretrained test for nested kwargs handling

* [run-slow]align

* update documentation + imports

* update audio inputs

* protect audio types, silly

* try removing imports

* make things simpler

* simplerer

* move out kwargs test to common mixin

* [run-slow]align

* skip tests for old processors

* [run-slow]align, clip

* !$#@!! protect imports, darn it

* [run-slow]align, clip

* [run-slow]align, clip

* update common processor testing

* add altclip

* add chinese_clip

* add pad_size

* [run-slow]align, clip, chinese_clip, altclip

* remove duplicated tests

* fix

* update doc

* improve documentation for default values

* add model_max_length testing

This parameter depends on tokenizers received.

* Raise if kwargs are specified in two places

* fix

* match defaults

* force padding

* fix tokenizer test

* clean defaults

* move tests to common

* remove try/catch block

* deprecate kwarg

* format

* add copyright + remove unused method

* [run-slow]altclip, chinese_clip

* clean imports

* fix version

* clean up deprecation

* fix style

* add corner case test on kwarg overlap

* resume processing - add Unpack as importable

* add tmpdirname

* fix altclip

* fix up

* add back crop_size to specific tests

* generalize tests to possible video_processor

* add back crop_size arg

* fixup overlapping kwargs test for qformer_tokenizer

* remove copied from

* fixup chinese_clip tests values

* fixup tests - qformer tokenizers

* [run-slow] altclip, chinese_clip

* remove prepare_image_inputs
This commit is contained in:
Pablo Montalvo
2024-09-19 17:21:54 +02:00
committed by GitHub
parent 4f0246e535
commit 413008c580
10 changed files with 463 additions and 52 deletions

View File

@@ -146,7 +146,6 @@ class ProcessorTesterMixin:
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input, return_tensors="pt")
self.assertEqual(len(inputs["input_ids"][0]), 117)
@@ -175,7 +174,6 @@ class ProcessorTesterMixin:
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(
text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
)
@@ -238,7 +236,6 @@ class ProcessorTesterMixin:
padding="longest",
max_length=76,
)
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["input_ids"][0]), 6)
@@ -311,3 +308,30 @@ class ProcessorTesterMixin:
self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["input_ids"][0]), 76)
# TODO: the same test, but for audio + text processors that have strong overlap in kwargs
# TODO (molbap) use the same structure of attribute kwargs for other tests to avoid duplication
def test_overlapping_text_kwargs_handling(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
processor_kwargs = {}
processor_kwargs["image_processor"] = self.get_component("image_processor")
processor_kwargs["tokenizer"] = tokenizer = self.get_component("tokenizer")
if not tokenizer.pad_token:
tokenizer.pad_token = "[TEST_PAD]"
if "video_processor" in self.processor_class.attributes:
processor_kwargs["video_processor"] = self.get_component("video_processor")
processor = self.processor_class(**processor_kwargs)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()
with self.assertRaises(ValueError):
_ = processor(
text=input_str,
images=image_input,
return_tensors="pt",
padding="max_length",
text_kwargs={"padding": "do_not_pad"},
)