Fix missing sequences_scores in the Whisper beam search output (#32970)

* added sequences_scores to the output * added beam_indices to output * added test to check for beam_indices, sequences_scores and their shape * removed redundant whitespaces * make fixup
2024-09-17 19:36:11 +01:00
parent 46c27577b3
commit c29a8694b0
2 changed files with 25 additions and 4 deletions
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -529,6 +529,25 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
            with torch.no_grad():
                model(**inputs)[0]

+    def test_beam_search_output(self):
+        config, input_dict = self.model_tester.prepare_config_and_inputs()
+        model = WhisperForConditionalGeneration(config).to(torch_device).eval()
+
+        input_features = input_dict["input_features"]
+
+        # Perform beam search
+        output = model.generate(
+            input_features, num_beams=3, num_return_sequences=3, return_dict_in_generate=True, output_scores=True
+        )
+
+        # Check if beam_indices and sequences_scores are in the output
+        self.assertIn("beam_indices", output, "beam_indices not found in the output")
+        self.assertIn("sequences_scores", output, "sequences_scores not found in the output")
+
+        # Validate the shapes of the beam_indices and sequences_scores
+        self.assertEqual(output.beam_indices.shape[0], input_features.shape[0] * 3)
+        self.assertEqual(output.sequences_scores.shape[0], input_features.shape[0] * 3)
+
    # training is not supported yet
    @unittest.skip(reason="Training is not supported yet")
    def test_training(self):