[Whispe] Fix pipeline after timestamp merges (#21198)
* pass return_timestamps to pre-process * add a test to test it * test does not need device 0 * remove failing bit * update test
This commit is contained in:
@@ -291,6 +291,29 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
|
||||
output = speech_recognizer(filename)
|
||||
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
|
||||
|
||||
@require_torch
|
||||
def test_return_timestamps_in_preprocess(self):
|
||||
pipe = pipeline(
|
||||
task="automatic-speech-recognition",
|
||||
model="openai/whisper-tiny",
|
||||
chunk_length_s=8,
|
||||
stride_length_s=1,
|
||||
)
|
||||
data = load_dataset("librispeech_asr", "clean", split="test", streaming=True)
|
||||
sample = next(iter(data))
|
||||
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="en", task="transcribe")
|
||||
|
||||
res = pipe(sample["audio"]["array"])
|
||||
self.assertEqual(res, {"text": " Conquered returned to its place amidst the tents."})
|
||||
res = pipe(sample["audio"]["array"], return_timestamps=True)
|
||||
self.assertEqual(
|
||||
res,
|
||||
{
|
||||
"text": " Conquered returned to its place amidst the tents.",
|
||||
"chunks": [{"text": " Conquered returned to its place amidst the tents.", "timestamp": (0.0, 3.36)}],
|
||||
},
|
||||
)
|
||||
|
||||
@require_torch
|
||||
@slow
|
||||
def test_torch_whisper(self):
|
||||
|
||||
Reference in New Issue
Block a user