committed by
GitHub
parent
3a24ba82ad
commit
fd70464fa7
@@ -2214,7 +2214,7 @@ class MllamaForConditionalGeneration(MllamaPreTrainedModel, GenerationMixin):
|
|||||||
|
|
||||||
# If we're in pre-fill or cacheless decoding step, then we need pixel_values and aspect ratios
|
# If we're in pre-fill or cacheless decoding step, then we need pixel_values and aspect ratios
|
||||||
# to compute image hidden states, otherwise they are cached within each cross attn layer
|
# to compute image hidden states, otherwise they are cached within each cross attn layer
|
||||||
if (input_ids == self.config.image_token_index).any():
|
if cache_position[0] == 0:
|
||||||
model_inputs["pixel_values"] = pixel_values
|
model_inputs["pixel_values"] = pixel_values
|
||||||
model_inputs["aspect_ratio_ids"] = aspect_ratio_ids
|
model_inputs["aspect_ratio_ids"] = aspect_ratio_ids
|
||||||
model_inputs["aspect_ratio_mask"] = aspect_ratio_mask
|
model_inputs["aspect_ratio_mask"] = aspect_ratio_mask
|
||||||
|
|||||||
@@ -243,6 +243,7 @@ def check_attribute_being_used(config_class, attributes, default_value, source_s
|
|||||||
"pad_index",
|
"pad_index",
|
||||||
"unk_index",
|
"unk_index",
|
||||||
"mask_index",
|
"mask_index",
|
||||||
|
"image_token_index", # for VLMs
|
||||||
"image_size",
|
"image_size",
|
||||||
"use_cache",
|
"use_cache",
|
||||||
"out_features",
|
"out_features",
|
||||||
|
|||||||
Reference in New Issue
Block a user