[whisper] move processor test into processor test file 🧹 (#38266)

move processor tests
2025-05-22 10:07:11 +01:00
parent b26157d64c
commit aa02a5d902
3 changed files with 295 additions and 290 deletions
--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -33,7 +33,7 @@ from transformers import (
 )
 from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline
 from transformers.pipelines.audio_utils import chunk_bytes_iter, ffmpeg_microphone_live
-from transformers.pipelines.automatic_speech_recognition import _find_timestamp_sequence, chunk_iter
+from transformers.pipelines.automatic_speech_recognition import chunk_iter
 from transformers.testing_utils import (
    compare_pipeline_output_to_hub_spec,
    is_pipeline_test,
@@ -636,169 +636,6 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        output = speech_recognizer(ds["audio"], batch_size=2)
        self.assertEqual(output, EXPECTED_OUTPUT)

-    def test_find_longest_common_subsequence(self):
-        max_source_positions = 1500
-        processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
-
-        previous_sequence = [[51492, 406, 3163, 1953, 466, 13, 51612, 51612]]
-        self.assertEqual(
-            processor.decode(previous_sequence[0], output_offsets=True),
-            {
-                "text": " not worth thinking about.",
-                "offsets": [{"text": " not worth thinking about.", "timestamp": (22.56, 24.96)}],
-            },
-        )
-
-        # Merge when the previous sequence is a suffix of the next sequence
-        # fmt: off
-        next_sequences_1 = [
-            [50364, 295, 6177, 3391, 11, 19817, 3337, 507, 307, 406, 3163, 1953, 466, 13, 50614, 50614, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50834, 50257]
-        ]
-        # fmt: on
-        self.assertEqual(
-            processor.decode(next_sequences_1[0], output_offsets=True),
-            {
-                "text": (
-                    " of spectators, retrievality is not worth thinking about. His instant panic was followed by a"
-                    " small, sharp blow high on his chest.<|endoftext|>"
-                ),
-                "offsets": [
-                    {"text": " of spectators, retrievality is not worth thinking about.", "timestamp": (0.0, 5.0)},
-                    {
-                        "text": " His instant panic was followed by a small, sharp blow high on his chest.",
-                        "timestamp": (5.0, 9.4),
-                    },
-                ],
-            },
-        )
-        merge = _find_timestamp_sequence(
-            [[previous_sequence, (480_000, 0, 0)], [next_sequences_1, (480_000, 120_000, 0)]],
-            processor.tokenizer,
-            processor.feature_extractor,
-            max_source_positions,
-        )
-
-        # fmt: off
-        self.assertEqual(
-            merge,
-            [51492, 406, 3163, 1953, 466, 13, 51739, 51739, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51959],
-        )
-        # fmt: on
-        self.assertEqual(
-            processor.decode(merge, output_offsets=True),
-            {
-                "text": (
-                    " not worth thinking about. His instant panic was followed by a small, sharp blow high on his"
-                    " chest."
-                ),
-                "offsets": [
-                    {"text": " not worth thinking about.", "timestamp": (22.56, 27.5)},
-                    {
-                        "text": " His instant panic was followed by a small, sharp blow high on his chest.",
-                        "timestamp": (27.5, 31.900000000000002),
-                    },
-                ],
-            },
-        )
-
-        # Merge when the sequence is in the middle of the 1st next sequence
-        # fmt: off
-        next_sequences_2 = [
-            [50364, 295, 6177, 3391, 11, 19817, 3337, 507, 307, 406, 3163, 1953, 466, 13, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50834, 50257]
-        ]
-        # fmt: on
-        # {'text': ' of spectators, retrievality is not worth thinking about. His instant panic was followed by a small, sharp blow high on his chest.','timestamp': (0.0, 9.4)}
-        merge = _find_timestamp_sequence(
-            [[previous_sequence, (480_000, 0, 0)], [next_sequences_2, (480_000, 120_000, 0)]],
-            processor.tokenizer,
-            processor.feature_extractor,
-            max_source_positions,
-        )
-        # fmt: off
-        self.assertEqual(
-            merge,
-            [51492, 406, 3163, 1953, 466, 13, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51959],
-        )
-        # fmt: on
-        self.assertEqual(
-            processor.decode(merge, output_offsets=True),
-            {
-                "text": (
-                    " not worth thinking about. His instant panic was followed by a small, sharp blow high on his"
-                    " chest."
-                ),
-                "offsets": [
-                    {
-                        "text": (
-                            " not worth thinking about. His instant panic was followed by a small, sharp blow high on"
-                            " his chest."
-                        ),
-                        "timestamp": (22.56, 31.900000000000002),
-                    },
-                ],
-            },
-        )
-
-        # Merge when the previous sequence is not included in the current sequence
-        next_sequences_3 = [[50364, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50584, 50257]]  # fmt: skip
-        # {'text': ' His instant panic was followed by a small, sharp blow high on his chest.','timestamp': (0.0, 9.4)}
-        merge = _find_timestamp_sequence(
-            [[previous_sequence, (480_000, 0, 0)], [next_sequences_3, (480_000, 120_000, 0)]],
-            processor.tokenizer,
-            processor.feature_extractor,
-            max_source_positions,
-        )
-        self.assertEqual(
-            merge,
-            [51492, 406, 3163, 1953, 466, 13, 51612, 51612, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51832],
-        )  # fmt: skip
-        self.assertEqual(
-            processor.decode(merge, output_offsets=True),
-            {
-                "text": (
-                    " not worth thinking about. His instant panic was followed by a small, sharp blow high on his"
-                    " chest."
-                ),
-                "offsets": [
-                    {"text": " not worth thinking about.", "timestamp": (22.56, 24.96)},
-                    {
-                        "text": " His instant panic was followed by a small, sharp blow high on his chest.",
-                        "timestamp": (24.96, 29.36),
-                    },
-                ],
-            },
-        )
-        # last case is when the sequence is not in the first next predicted start and end of timestamp
-        next_sequences_3 = [
-            [50364, 2812, 9836, 14783, 390, 406, 3163, 1953, 466, 13, 50634, 50634, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50934]
-        ]  # fmt: skip
-        merge = _find_timestamp_sequence(
-            [[previous_sequence, (480_000, 0, 0)], [next_sequences_3, (480_000, 167_000, 0)]],
-            processor.tokenizer,
-            processor.feature_extractor,
-            max_source_positions,
-        )
-        self.assertEqual(
-            merge,
-            [51492, 406, 3163, 1953, 466, 13, 51612, 51612, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51912]
-        )  # fmt: skip
-        self.assertEqual(
-            processor.decode(merge, output_offsets=True),
-            {
-                "text": (
-                    " not worth thinking about. His instant panic was followed by a small, sharp blow high on his"
-                    " chest."
-                ),
-                "offsets": [
-                    {"text": " not worth thinking about.", "timestamp": (22.56, 24.96)},
-                    {
-                        "text": " His instant panic was followed by a small, sharp blow high on his chest.",
-                        "timestamp": (24.96, 30.96),
-                    },
-                ],
-            },
-        )
-
    @slow
    @require_torch
    @unittest.skip("TODO (joao, eustache): this test is failing, find the breaking PR and fix the cause or the test")