From 28df7f854ac4ec650c4a5057cc95a072d5efa5a8 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 2 Jul 2025 19:57:15 +0800 Subject: [PATCH] Fix multimodal processor get duplicate arguments when receive kwargs for initialization (#39125) * fix processor tokenizer override Signed-off-by: Isotr0py <2037008807@qq.com> * code format Signed-off-by: Isotr0py <2037008807@qq.com> * add regression test Signed-off-by: Isotr0py <2037008807@qq.com> * fix Signed-off-by: Isotr0py <2037008807@qq.com> * check image processor same Signed-off-by: Isotr0py <2037008807@qq.com> --------- Signed-off-by: Isotr0py <2037008807@qq.com> --- src/transformers/processing_utils.py | 10 +++++++--- tests/test_processing_common.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 2a97cde3cc..9dd9d9ce00 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -1097,9 +1097,13 @@ class ProcessorMixin(PushToHubMixin): processor_config=processor_dict, valid_kwargs=accepted_args_and_kwargs ) - # remove args that are in processor_dict to avoid duplicate arguments - args_to_remove = [i for i, arg in enumerate(accepted_args_and_kwargs) if arg in processor_dict] - args = [arg for i, arg in enumerate(args) if i not in args_to_remove] + # update args that are already in processor_dict to avoid duplicate arguments + args_to_update = { + i: valid_kwargs.pop(arg) + for i, arg in enumerate(accepted_args_and_kwargs) + if (arg in valid_kwargs and i < len(args)) + } + args = [arg if i not in args_to_update else args_to_update[i] for i, arg in enumerate(args)] # instantiate processor with used (and valid) kwargs only processor = cls(*args, **valid_kwargs) diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py index ebede32f3e..855bcaaf27 100644 --- a/tests/test_processing_common.py +++ b/tests/test_processing_common.py @@ -351,6 +351,18 @@ class ProcessorTesterMixin: return_tensors="pt", ) + def test_args_overlap_kwargs(self): + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + processor_first = self.get_processor() + image_processor = processor_first.image_processor + image_processor.is_override = True + + with tempfile.TemporaryDirectory() as tmpdirname: + processor_first.save_pretrained(tmpdirname) + processor_second = self.processor_class.from_pretrained(tmpdirname, image_processor=image_processor) + self.assertTrue(processor_second.image_processor.is_override) + def test_structured_kwargs_nested(self): if "image_processor" not in self.processor_class.attributes: self.skipTest(f"image_processor attribute not present in {self.processor_class}")