Adding chunking for whisper (all seq2seq actually). Very crude matching algorithm. (#20104)

* Very crude matching algorithm.

* Fixing tests.

* Removing comments

* Adding warning + fix short matches.

* Cleanup tests.

* Quality.

* Less noisy.

* Fixup.
This commit is contained in:
Nicolas Patry
2022-11-14 22:32:50 +01:00
committed by GitHub
parent 938cb04789
commit 25c451e5a0
2 changed files with 75 additions and 18 deletions

View File

@@ -144,12 +144,8 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
output = speech_recognizer(waveform)
self.assertEqual(output, {"text": "(Applaudissements)"})
with self.assertRaises(ValueError) as v:
_ = speech_recognizer(waveform, chunk_length_s=10)
self.assertEqual(
str(v.exception),
"`chunk_length_s` is only valid for CTC models, use other chunking options for other models",
)
output = speech_recognizer(waveform, chunk_length_s=10)
self.assertEqual(output, {"text": "(Applaudissements)"})
# Non CTC models cannot use return_timestamps
with self.assertRaises(ValueError) as v:
@@ -261,6 +257,22 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
@require_torch
@slow
def test_torch_whisper(self):
speech_recognizer = pipeline(
task="automatic-speech-recognition",
model="openai/whisper-tiny",
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": " A man said to the universe, Sir, I exist."})
output = speech_recognizer([filename], chunk_length_s=5, batch_size=4)
self.assertEqual(output, [{"text": " A man said to the universe, Sir, I exist."}])
@require_torch
@slow
def test_torch_speech_encoder_decoder(self):