Adding timestamps for CTC with LM in ASR pipeline. (#15863)
* Adding timestamps for CTC with LM in ASR pipeline. * iRemove print. * Nit change.
This commit is contained in:
@@ -188,6 +188,32 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
|
||||
},
|
||||
)
|
||||
|
||||
speech_recognizer.type = "ctc_with_lm"
|
||||
# Simple test with CTC with LM, chunking + timestamps
|
||||
output = speech_recognizer(filename, chunk_length_s=2.0, return_timestamps="word")
|
||||
self.assertEqual(
|
||||
output,
|
||||
{
|
||||
"text": "y en las ramas medio sumergidas revoloteaban algunos pájaros de quimérico y legendario plumajcri",
|
||||
"chunks": [
|
||||
{"text": "y", "timestamp": (0.52, 0.54)},
|
||||
{"text": "en", "timestamp": (0.6, 0.68)},
|
||||
{"text": "las", "timestamp": (0.74, 0.84)},
|
||||
{"text": "ramas", "timestamp": (0.94, 1.24)},
|
||||
{"text": "medio", "timestamp": (1.32, 1.52)},
|
||||
{"text": "sumergidas", "timestamp": (1.56, 2.22)},
|
||||
{"text": "revoloteaban", "timestamp": (2.36, 3.0)},
|
||||
{"text": "algunos", "timestamp": (3.06, 3.38)},
|
||||
{"text": "pájaros", "timestamp": (3.46, 3.86)},
|
||||
{"text": "de", "timestamp": (3.92, 4.0)},
|
||||
{"text": "quimérico", "timestamp": (4.08, 4.6)},
|
||||
{"text": "y", "timestamp": (4.66, 4.68)},
|
||||
{"text": "legendario", "timestamp": (4.74, 5.26)},
|
||||
{"text": "plumajcri", "timestamp": (5.34, 5.74)},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
@require_tf
|
||||
def test_small_model_tf(self):
|
||||
self.skipTest("Tensorflow not supported yet.")
|
||||
|
||||
Reference in New Issue
Block a user