Fix multimodal processor get duplicate arguments when receive kwargs for initialization (#39125)
* fix processor tokenizer override Signed-off-by: Isotr0py <2037008807@qq.com> * code format Signed-off-by: Isotr0py <2037008807@qq.com> * add regression test Signed-off-by: Isotr0py <2037008807@qq.com> * fix Signed-off-by: Isotr0py <2037008807@qq.com> * check image processor same Signed-off-by: Isotr0py <2037008807@qq.com> --------- Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -1097,9 +1097,13 @@ class ProcessorMixin(PushToHubMixin):
|
||||
processor_config=processor_dict, valid_kwargs=accepted_args_and_kwargs
|
||||
)
|
||||
|
||||
# remove args that are in processor_dict to avoid duplicate arguments
|
||||
args_to_remove = [i for i, arg in enumerate(accepted_args_and_kwargs) if arg in processor_dict]
|
||||
args = [arg for i, arg in enumerate(args) if i not in args_to_remove]
|
||||
# update args that are already in processor_dict to avoid duplicate arguments
|
||||
args_to_update = {
|
||||
i: valid_kwargs.pop(arg)
|
||||
for i, arg in enumerate(accepted_args_and_kwargs)
|
||||
if (arg in valid_kwargs and i < len(args))
|
||||
}
|
||||
args = [arg if i not in args_to_update else args_to_update[i] for i, arg in enumerate(args)]
|
||||
|
||||
# instantiate processor with used (and valid) kwargs only
|
||||
processor = cls(*args, **valid_kwargs)
|
||||
|
||||
@@ -351,6 +351,18 @@ class ProcessorTesterMixin:
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
def test_args_overlap_kwargs(self):
|
||||
if "image_processor" not in self.processor_class.attributes:
|
||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
||||
processor_first = self.get_processor()
|
||||
image_processor = processor_first.image_processor
|
||||
image_processor.is_override = True
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
processor_first.save_pretrained(tmpdirname)
|
||||
processor_second = self.processor_class.from_pretrained(tmpdirname, image_processor=image_processor)
|
||||
self.assertTrue(processor_second.image_processor.is_override)
|
||||
|
||||
def test_structured_kwargs_nested(self):
|
||||
if "image_processor" not in self.processor_class.attributes:
|
||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
||||
|
||||
Reference in New Issue
Block a user