[Whisper] Block language/task args for English-only (#27322)

* [Whisper] Block language/task args for English-only

* Update src/transformers/models/whisper/modeling_whisper.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

---------

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Sanchit Gandhi
2023-11-07 10:04:23 +00:00
committed by GitHub
parent 9beb2737d7
commit da7ea9a4e3
2 changed files with 54 additions and 0 deletions

View File

@@ -852,6 +852,44 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output_3 = speech_translator(filename)
self.assertEqual(output_3, {"text": " Un uomo ha detto all'universo, Sir, esiste."})
@slow
@require_torch
def test_whisper_language(self):
speech_recognizer = pipeline(
task="automatic-speech-recognition",
model="openai/whisper-tiny.en",
framework="pt",
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
filename = ds[0]["file"]
# 1. English-only model compatible with no language argument
output = speech_recognizer(filename)
self.assertEqual(
output,
{"text": " Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel."},
)
# 2. English-only Whisper does not accept the language argument
with self.assertRaisesRegex(
ValueError,
"Cannot specify `task` or `langauge` for an English-only model. If the model is intended to be multilingual, "
"pass `is_multilingual=True` to generate, or update the generation config.",
):
_ = speech_recognizer(filename, generate_kwargs={"language": "en"})
# 3. Multilingual model accepts language argument
speech_recognizer = pipeline(
task="automatic-speech-recognition",
model="openai/whisper-tiny",
framework="pt",
)
output = speech_recognizer(filename, generate_kwargs={"language": "en"})
self.assertEqual(
output,
{"text": " Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel."},
)
@slow
@require_torch
@require_torchaudio