[whisper] move processor test into processor test file 🧹 (#38266)
move processor tests
This commit is contained in:
@@ -33,7 +33,7 @@ from transformers import (
|
||||
)
|
||||
from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline
|
||||
from transformers.pipelines.audio_utils import chunk_bytes_iter, ffmpeg_microphone_live
|
||||
from transformers.pipelines.automatic_speech_recognition import _find_timestamp_sequence, chunk_iter
|
||||
from transformers.pipelines.automatic_speech_recognition import chunk_iter
|
||||
from transformers.testing_utils import (
|
||||
compare_pipeline_output_to_hub_spec,
|
||||
is_pipeline_test,
|
||||
@@ -636,169 +636,6 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
output = speech_recognizer(ds["audio"], batch_size=2)
|
||||
self.assertEqual(output, EXPECTED_OUTPUT)
|
||||
|
||||
def test_find_longest_common_subsequence(self):
|
||||
max_source_positions = 1500
|
||||
processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
|
||||
|
||||
previous_sequence = [[51492, 406, 3163, 1953, 466, 13, 51612, 51612]]
|
||||
self.assertEqual(
|
||||
processor.decode(previous_sequence[0], output_offsets=True),
|
||||
{
|
||||
"text": " not worth thinking about.",
|
||||
"offsets": [{"text": " not worth thinking about.", "timestamp": (22.56, 24.96)}],
|
||||
},
|
||||
)
|
||||
|
||||
# Merge when the previous sequence is a suffix of the next sequence
|
||||
# fmt: off
|
||||
next_sequences_1 = [
|
||||
[50364, 295, 6177, 3391, 11, 19817, 3337, 507, 307, 406, 3163, 1953, 466, 13, 50614, 50614, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50834, 50257]
|
||||
]
|
||||
# fmt: on
|
||||
self.assertEqual(
|
||||
processor.decode(next_sequences_1[0], output_offsets=True),
|
||||
{
|
||||
"text": (
|
||||
" of spectators, retrievality is not worth thinking about. His instant panic was followed by a"
|
||||
" small, sharp blow high on his chest.<|endoftext|>"
|
||||
),
|
||||
"offsets": [
|
||||
{"text": " of spectators, retrievality is not worth thinking about.", "timestamp": (0.0, 5.0)},
|
||||
{
|
||||
"text": " His instant panic was followed by a small, sharp blow high on his chest.",
|
||||
"timestamp": (5.0, 9.4),
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
merge = _find_timestamp_sequence(
|
||||
[[previous_sequence, (480_000, 0, 0)], [next_sequences_1, (480_000, 120_000, 0)]],
|
||||
processor.tokenizer,
|
||||
processor.feature_extractor,
|
||||
max_source_positions,
|
||||
)
|
||||
|
||||
# fmt: off
|
||||
self.assertEqual(
|
||||
merge,
|
||||
[51492, 406, 3163, 1953, 466, 13, 51739, 51739, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51959],
|
||||
)
|
||||
# fmt: on
|
||||
self.assertEqual(
|
||||
processor.decode(merge, output_offsets=True),
|
||||
{
|
||||
"text": (
|
||||
" not worth thinking about. His instant panic was followed by a small, sharp blow high on his"
|
||||
" chest."
|
||||
),
|
||||
"offsets": [
|
||||
{"text": " not worth thinking about.", "timestamp": (22.56, 27.5)},
|
||||
{
|
||||
"text": " His instant panic was followed by a small, sharp blow high on his chest.",
|
||||
"timestamp": (27.5, 31.900000000000002),
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
# Merge when the sequence is in the middle of the 1st next sequence
|
||||
# fmt: off
|
||||
next_sequences_2 = [
|
||||
[50364, 295, 6177, 3391, 11, 19817, 3337, 507, 307, 406, 3163, 1953, 466, 13, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50834, 50257]
|
||||
]
|
||||
# fmt: on
|
||||
# {'text': ' of spectators, retrievality is not worth thinking about. His instant panic was followed by a small, sharp blow high on his chest.','timestamp': (0.0, 9.4)}
|
||||
merge = _find_timestamp_sequence(
|
||||
[[previous_sequence, (480_000, 0, 0)], [next_sequences_2, (480_000, 120_000, 0)]],
|
||||
processor.tokenizer,
|
||||
processor.feature_extractor,
|
||||
max_source_positions,
|
||||
)
|
||||
# fmt: off
|
||||
self.assertEqual(
|
||||
merge,
|
||||
[51492, 406, 3163, 1953, 466, 13, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51959],
|
||||
)
|
||||
# fmt: on
|
||||
self.assertEqual(
|
||||
processor.decode(merge, output_offsets=True),
|
||||
{
|
||||
"text": (
|
||||
" not worth thinking about. His instant panic was followed by a small, sharp blow high on his"
|
||||
" chest."
|
||||
),
|
||||
"offsets": [
|
||||
{
|
||||
"text": (
|
||||
" not worth thinking about. His instant panic was followed by a small, sharp blow high on"
|
||||
" his chest."
|
||||
),
|
||||
"timestamp": (22.56, 31.900000000000002),
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
# Merge when the previous sequence is not included in the current sequence
|
||||
next_sequences_3 = [[50364, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50584, 50257]] # fmt: skip
|
||||
# {'text': ' His instant panic was followed by a small, sharp blow high on his chest.','timestamp': (0.0, 9.4)}
|
||||
merge = _find_timestamp_sequence(
|
||||
[[previous_sequence, (480_000, 0, 0)], [next_sequences_3, (480_000, 120_000, 0)]],
|
||||
processor.tokenizer,
|
||||
processor.feature_extractor,
|
||||
max_source_positions,
|
||||
)
|
||||
self.assertEqual(
|
||||
merge,
|
||||
[51492, 406, 3163, 1953, 466, 13, 51612, 51612, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51832],
|
||||
) # fmt: skip
|
||||
self.assertEqual(
|
||||
processor.decode(merge, output_offsets=True),
|
||||
{
|
||||
"text": (
|
||||
" not worth thinking about. His instant panic was followed by a small, sharp blow high on his"
|
||||
" chest."
|
||||
),
|
||||
"offsets": [
|
||||
{"text": " not worth thinking about.", "timestamp": (22.56, 24.96)},
|
||||
{
|
||||
"text": " His instant panic was followed by a small, sharp blow high on his chest.",
|
||||
"timestamp": (24.96, 29.36),
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
# last case is when the sequence is not in the first next predicted start and end of timestamp
|
||||
next_sequences_3 = [
|
||||
[50364, 2812, 9836, 14783, 390, 406, 3163, 1953, 466, 13, 50634, 50634, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50934]
|
||||
] # fmt: skip
|
||||
merge = _find_timestamp_sequence(
|
||||
[[previous_sequence, (480_000, 0, 0)], [next_sequences_3, (480_000, 167_000, 0)]],
|
||||
processor.tokenizer,
|
||||
processor.feature_extractor,
|
||||
max_source_positions,
|
||||
)
|
||||
self.assertEqual(
|
||||
merge,
|
||||
[51492, 406, 3163, 1953, 466, 13, 51612, 51612, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51912]
|
||||
) # fmt: skip
|
||||
self.assertEqual(
|
||||
processor.decode(merge, output_offsets=True),
|
||||
{
|
||||
"text": (
|
||||
" not worth thinking about. His instant panic was followed by a small, sharp blow high on his"
|
||||
" chest."
|
||||
),
|
||||
"offsets": [
|
||||
{"text": " not worth thinking about.", "timestamp": (22.56, 24.96)},
|
||||
{
|
||||
"text": " His instant panic was followed by a small, sharp blow high on his chest.",
|
||||
"timestamp": (24.96, 30.96),
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
@slow
|
||||
@require_torch
|
||||
@unittest.skip("TODO (joao, eustache): this test is failing, find the breaking PR and fix the cause or the test")
|
||||
|
||||
Reference in New Issue
Block a user