enable low-precision pipeline (#31625)
* enable low-precision pipeline * fix parameter for ASR * reformat * fix asr bug * fix bug for zero-shot * add dtype check * rm useless comments * add np.float16 check * Update src/transformers/pipelines/image_classification.py Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> * Update src/transformers/pipelines/token_classification.py Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> * fix comments * fix asr check * make fixup * No more need for is_torch_available() --------- Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> Co-authored-by: Matt <Rocketknight1@users.noreply.github.com> Co-authored-by: Matt <rocketknight1@gmail.com>
This commit is contained in:
@@ -167,6 +167,48 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
):
|
||||
_ = speech_recognizer(waveform, return_timestamps="char")
|
||||
|
||||
@require_torch
|
||||
def test_small_model_pt_fp16(self):
|
||||
speech_recognizer = pipeline(
|
||||
task="automatic-speech-recognition",
|
||||
model="facebook/s2t-small-mustc-en-fr-st",
|
||||
tokenizer="facebook/s2t-small-mustc-en-fr-st",
|
||||
framework="pt",
|
||||
torch_dtype=torch.float16,
|
||||
)
|
||||
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
|
||||
output = speech_recognizer(waveform)
|
||||
self.assertEqual(output, {"text": "(Applaudissements)"})
|
||||
output = speech_recognizer(waveform, chunk_length_s=10)
|
||||
self.assertEqual(output, {"text": "(Applaudissements)"})
|
||||
|
||||
# Non CTC models cannot use return_timestamps
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, "^We cannot return_timestamps yet on non-CTC models apart from Whisper!$"
|
||||
):
|
||||
_ = speech_recognizer(waveform, return_timestamps="char")
|
||||
|
||||
@require_torch
|
||||
def test_small_model_pt_bf16(self):
|
||||
speech_recognizer = pipeline(
|
||||
task="automatic-speech-recognition",
|
||||
model="facebook/s2t-small-mustc-en-fr-st",
|
||||
tokenizer="facebook/s2t-small-mustc-en-fr-st",
|
||||
framework="pt",
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
|
||||
output = speech_recognizer(waveform)
|
||||
self.assertEqual(output, {"text": "(Applaudissements)"})
|
||||
output = speech_recognizer(waveform, chunk_length_s=10)
|
||||
self.assertEqual(output, {"text": "(Applaudissements)"})
|
||||
|
||||
# Non CTC models cannot use return_timestamps
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, "^We cannot return_timestamps yet on non-CTC models apart from Whisper!$"
|
||||
):
|
||||
_ = speech_recognizer(waveform, return_timestamps="char")
|
||||
|
||||
@slow
|
||||
@require_torch_accelerator
|
||||
def test_whisper_fp16(self):
|
||||
|
||||
Reference in New Issue
Block a user