Fix missing fast tokenizer/image_processor in whisper/qwen2.5-omni processor (#39244)
* fix missing fast tokenizer in whisper processor Signed-off-by: Isotr0py <2037008807@qq.com> * fix processor test Signed-off-by: Isotr0py <2037008807@qq.com> * fix qwen2.5 omni processor Signed-off-by: Isotr0py <2037008807@qq.com> --------- Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -93,8 +93,8 @@ class Qwen2_5OmniProcessor(ProcessorMixin):
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "video_processor", "feature_extractor", "tokenizer"]
|
||||
image_processor_class = "Qwen2VLImageProcessor"
|
||||
video_processor_class = "Qwen2VLVideoProcessor"
|
||||
image_processor_class = "AutoImageProcessor"
|
||||
video_processor_class = "AutoVideoProcessor"
|
||||
feature_extractor_class = "WhisperFeatureExtractor"
|
||||
tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class WhisperProcessor(ProcessorMixin):
|
||||
"""
|
||||
|
||||
feature_extractor_class = "WhisperFeatureExtractor"
|
||||
tokenizer_class = "WhisperTokenizer"
|
||||
tokenizer_class = ("WhisperTokenizer", "WhisperTokenizerFast")
|
||||
|
||||
def __init__(self, feature_extractor, tokenizer):
|
||||
super().__init__(feature_extractor, tokenizer)
|
||||
|
||||
@@ -19,7 +19,7 @@ import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from transformers import WhisperTokenizer, is_speech_available
|
||||
from transformers import WhisperTokenizer, WhisperTokenizerFast, is_speech_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_torch, require_torchaudio
|
||||
|
||||
from .test_feature_extraction_whisper import floats_list
|
||||
@@ -60,7 +60,7 @@ class WhisperProcessorTest(unittest.TestCase):
|
||||
processor = WhisperProcessor.from_pretrained(self.tmpdirname)
|
||||
|
||||
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer.get_vocab())
|
||||
self.assertIsInstance(processor.tokenizer, WhisperTokenizer)
|
||||
self.assertIsInstance(processor.tokenizer, WhisperTokenizerFast)
|
||||
|
||||
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string())
|
||||
self.assertIsInstance(processor.feature_extractor, WhisperFeatureExtractor)
|
||||
@@ -77,7 +77,7 @@ class WhisperProcessorTest(unittest.TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())
|
||||
self.assertIsInstance(processor.tokenizer, WhisperTokenizer)
|
||||
self.assertIsInstance(processor.tokenizer, WhisperTokenizerFast)
|
||||
|
||||
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor_add_kwargs.to_json_string())
|
||||
self.assertIsInstance(processor.feature_extractor, WhisperFeatureExtractor)
|
||||
|
||||
Reference in New Issue
Block a user