From 021887682224daf29264f98c759a45e88c82e244 Mon Sep 17 00:00:00 2001 From: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> Date: Thu, 24 Aug 2023 17:58:37 +0100 Subject: [PATCH] [ASR Pipe Test] Fix CTC timestamps error message (#25727) --- src/transformers/pipelines/automatic_speech_recognition.py | 2 +- tests/pipelines/test_pipelines_automatic_speech_recognition.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index fc2a9b3057..98e43eef85 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -402,7 +402,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): raise ValueError("CTC with LM can only predict word level timestamps, set `return_timestamps='word'`") if self.type == "ctc" and return_timestamps not in ["char", "word"]: raise ValueError( - "CTC can either predict character (char) level timestamps, or word level timestamps." + "CTC can either predict character level timestamps, or word level timestamps." "Set `return_timestamps='char'` or `return_timestamps='word'` as required." ) if self.type == "seq2seq_whisper" and return_timestamps == "char": diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index 7c6a950c3b..51747482ce 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -1150,7 +1150,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): # CTC models must specify return_timestamps type - cannot set `return_timestamps=True` blindly with self.assertRaisesRegex( ValueError, - "^CTC can either predict character (char) level timestamps, or word level timestamps." + "^CTC can either predict character level timestamps, or word level timestamps." "Set `return_timestamps='char'` or `return_timestamps='word'` as required.$", ): _ = speech_recognizer(audio, return_timestamps=True)