Fix multimodal processor get duplicate arguments when receive kwargs for initialization (#39125)

* fix processor tokenizer override

Signed-off-by: Isotr0py <2037008807@qq.com>

* code format

Signed-off-by: Isotr0py <2037008807@qq.com>

* add regression test

Signed-off-by: Isotr0py <2037008807@qq.com>

* fix

Signed-off-by: Isotr0py <2037008807@qq.com>

* check image processor same

Signed-off-by: Isotr0py <2037008807@qq.com>

---------

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2025-07-02 19:57:15 +08:00
committed by GitHub
parent b61023a1b7
commit 28df7f854a
2 changed files with 19 additions and 3 deletions

View File

@@ -1097,9 +1097,13 @@ class ProcessorMixin(PushToHubMixin):
processor_config=processor_dict, valid_kwargs=accepted_args_and_kwargs processor_config=processor_dict, valid_kwargs=accepted_args_and_kwargs
) )
# remove args that are in processor_dict to avoid duplicate arguments # update args that are already in processor_dict to avoid duplicate arguments
args_to_remove = [i for i, arg in enumerate(accepted_args_and_kwargs) if arg in processor_dict] args_to_update = {
args = [arg for i, arg in enumerate(args) if i not in args_to_remove] i: valid_kwargs.pop(arg)
for i, arg in enumerate(accepted_args_and_kwargs)
if (arg in valid_kwargs and i < len(args))
}
args = [arg if i not in args_to_update else args_to_update[i] for i, arg in enumerate(args)]
# instantiate processor with used (and valid) kwargs only # instantiate processor with used (and valid) kwargs only
processor = cls(*args, **valid_kwargs) processor = cls(*args, **valid_kwargs)

View File

@@ -351,6 +351,18 @@ class ProcessorTesterMixin:
return_tensors="pt", return_tensors="pt",
) )
def test_args_overlap_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
processor_first = self.get_processor()
image_processor = processor_first.image_processor
image_processor.is_override = True
with tempfile.TemporaryDirectory() as tmpdirname:
processor_first.save_pretrained(tmpdirname)
processor_second = self.processor_class.from_pretrained(tmpdirname, image_processor=image_processor)
self.assertTrue(processor_second.image_processor.is_override)
def test_structured_kwargs_nested(self): def test_structured_kwargs_nested(self):
if "image_processor" not in self.processor_class.attributes: if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}") self.skipTest(f"image_processor attribute not present in {self.processor_class}")