Fix multimodal processor get duplicate arguments when receive kwargs for initialization (#39125)
* fix processor tokenizer override Signed-off-by: Isotr0py <2037008807@qq.com> * code format Signed-off-by: Isotr0py <2037008807@qq.com> * add regression test Signed-off-by: Isotr0py <2037008807@qq.com> * fix Signed-off-by: Isotr0py <2037008807@qq.com> * check image processor same Signed-off-by: Isotr0py <2037008807@qq.com> --------- Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -1097,9 +1097,13 @@ class ProcessorMixin(PushToHubMixin):
|
|||||||
processor_config=processor_dict, valid_kwargs=accepted_args_and_kwargs
|
processor_config=processor_dict, valid_kwargs=accepted_args_and_kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
# remove args that are in processor_dict to avoid duplicate arguments
|
# update args that are already in processor_dict to avoid duplicate arguments
|
||||||
args_to_remove = [i for i, arg in enumerate(accepted_args_and_kwargs) if arg in processor_dict]
|
args_to_update = {
|
||||||
args = [arg for i, arg in enumerate(args) if i not in args_to_remove]
|
i: valid_kwargs.pop(arg)
|
||||||
|
for i, arg in enumerate(accepted_args_and_kwargs)
|
||||||
|
if (arg in valid_kwargs and i < len(args))
|
||||||
|
}
|
||||||
|
args = [arg if i not in args_to_update else args_to_update[i] for i, arg in enumerate(args)]
|
||||||
|
|
||||||
# instantiate processor with used (and valid) kwargs only
|
# instantiate processor with used (and valid) kwargs only
|
||||||
processor = cls(*args, **valid_kwargs)
|
processor = cls(*args, **valid_kwargs)
|
||||||
|
|||||||
@@ -351,6 +351,18 @@ class ProcessorTesterMixin:
|
|||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_args_overlap_kwargs(self):
|
||||||
|
if "image_processor" not in self.processor_class.attributes:
|
||||||
|
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
||||||
|
processor_first = self.get_processor()
|
||||||
|
image_processor = processor_first.image_processor
|
||||||
|
image_processor.is_override = True
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
|
processor_first.save_pretrained(tmpdirname)
|
||||||
|
processor_second = self.processor_class.from_pretrained(tmpdirname, image_processor=image_processor)
|
||||||
|
self.assertTrue(processor_second.image_processor.is_override)
|
||||||
|
|
||||||
def test_structured_kwargs_nested(self):
|
def test_structured_kwargs_nested(self):
|
||||||
if "image_processor" not in self.processor_class.attributes:
|
if "image_processor" not in self.processor_class.attributes:
|
||||||
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
|
||||||
|
|||||||
Reference in New Issue
Block a user