fix: AttributeError: 'LlavaProcessor' object has no attribute 'image_token_id' (#37026)

* Add image_token_id and video_token_id handling in Llava processors

* fix: image to video

* fix: correct image and video token ID handling in Llava processors

* fix: improve image and video token ID handling in Llava processors
This commit is contained in:
jp
2025-03-28 18:46:24 +09:00
committed by GitHub
parent 064cd7cdac
commit 3af425d4c6
4 changed files with 30 additions and 0 deletions

View File

@@ -89,6 +89,11 @@ class LlavaProcessor(ProcessorMixin):
self.num_additional_image_tokens = num_additional_image_tokens
self.vision_feature_select_strategy = vision_feature_select_strategy
self.image_token = tokenizer.image_token if hasattr(tokenizer, "image_token") else image_token
self.image_token_id = (
tokenizer.image_token_id
if getattr(tokenizer, "image_token_id", None)
else tokenizer.convert_tokens_to_ids(self.image_token)
)
super().__init__(image_processor, tokenizer, chat_template=chat_template)
def __call__(

View File

@@ -92,6 +92,11 @@ class LlavaNextProcessor(ProcessorMixin):
self.num_additional_image_tokens = num_additional_image_tokens
self.vision_feature_select_strategy = vision_feature_select_strategy
self.image_token = tokenizer.image_token if hasattr(tokenizer, "image_token") else image_token
self.image_token_id = (
tokenizer.image_token_id
if getattr(tokenizer, "image_token_id", None)
else tokenizer.convert_tokens_to_ids(self.image_token)
)
super().__init__(image_processor, tokenizer, chat_template=chat_template)
def __call__(

View File

@@ -107,6 +107,16 @@ class LlavaNextVideoProcessor(ProcessorMixin):
self.vision_feature_select_strategy = vision_feature_select_strategy
self.image_token = tokenizer.image_token if hasattr(tokenizer, "image_token") else image_token
self.video_token = tokenizer.video_token if hasattr(tokenizer, "video_token") else video_token
self.image_token_id = (
tokenizer.image_token_id
if getattr(tokenizer, "image_token_id", None)
else tokenizer.convert_tokens_to_ids(self.image_token)
)
self.video_token_id = (
tokenizer.video_token_id
if getattr(tokenizer, "video_token_id", None)
else tokenizer.convert_tokens_to_ids(self.video_token)
)
super().__init__(video_processor, image_processor, tokenizer, chat_template=chat_template)
def __call__(

View File

@@ -100,6 +100,16 @@ class LlavaOnevisionProcessor(ProcessorMixin):
self.vision_feature_select_strategy = vision_feature_select_strategy
self.image_token = tokenizer.image_token if hasattr(tokenizer, "image_token") else image_token
self.video_token = tokenizer.video_token if hasattr(tokenizer, "video_token") else video_token
self.image_token_id = (
tokenizer.image_token_id
if getattr(tokenizer, "image_token_id", None)
else tokenizer.convert_tokens_to_ids(self.image_token)
)
self.video_token_id = (
tokenizer.video_token_id
if getattr(tokenizer, "video_token_id", None)
else tokenizer.convert_tokens_to_ids(self.video_token)
)
super().__init__(image_processor, tokenizer, video_processor, chat_template=chat_template)
def __call__(