Chat template: update for processor (#35953)

* update

* we need batched nested input to always process correctly

* update a bit

* fix copies
This commit is contained in:
Raushan Turganbay
2025-02-10 09:52:19 +01:00
committed by GitHub
parent 5bd7694781
commit eebd2c972c
21 changed files with 966 additions and 111 deletions

View File

@@ -110,6 +110,8 @@ class AriaImageProcessor(BaseImageProcessor):
The resampling filter to use if resizing the image.
"""
model_input_names = ["pixel_values", "pixel_mask", "num_crops"]
def __init__(
self,
image_mean: List[float] = None,

View File

@@ -499,6 +499,8 @@ class AriaImageProcessor(BaseImageProcessor):
The resampling filter to use if resizing the image.
"""
model_input_names = ["pixel_values", "pixel_mask", "num_crops"]
def __init__(
self,
image_mean: List[float] = None,
@@ -997,6 +999,10 @@ class AriaProcessor(ProcessorMixin):
def model_input_names(self):
tokenizer_input_names = self.tokenizer.model_input_names
image_processor_input_names = self.image_processor.model_input_names
# Remove `num_crops`, it is popped and used only when processing. Make a copy of list when remocing
# otherwise `self.image_processor.model_input_names` is also modified
image_processor_input_names = [name for name in image_processor_input_names if name != "num_crops"]
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))

View File

@@ -158,6 +158,10 @@ class AriaProcessor(ProcessorMixin):
def model_input_names(self):
tokenizer_input_names = self.tokenizer.model_input_names
image_processor_input_names = self.image_processor.model_input_names
# Remove `num_crops`, it is popped and used only when processing. Make a copy of list when remocing
# otherwise `self.image_processor.model_input_names` is also modified
image_processor_input_names = [name for name in image_processor_input_names if name != "num_crops"]
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))