Fix Mamba2 Grouped SSD Support in the torch_forward Path (#37533)

* Fix mamba2 grouped support in bamba torch path * patch zamba2 and mamba2 * Add a unit test for grouped SSD * add comment for the new unit test * add output_size arg value to repeat_interleave calls * Add comment
2025-04-16 16:16:01 -04:00
parent a7d2bbaaa8
commit 4005730044
6 changed files with 18 additions and 10 deletions
--- a/tests/models/mamba2/test_modeling_mamba2.py
+++ b/tests/models/mamba2/test_modeling_mamba2.py
@@ -238,6 +238,14 @@ class Mamba2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mamba2_slow_vs_fast_forward(*config_and_inputs)

+    # This test adjusts n_groups to half the original setting and effectively
+    # creates a grouped SSD configuration in the mamba2 layers
+    # See https://github.com/huggingface/transformers/pull/37533/
+    def test_mamba2_slow_vs_fast_forward_grouped(self):
+        config_and_inputs = self.model_tester.prepare_config_and_inputs()
+        config_and_inputs[0].n_groups //= 2
+        self.model_tester.create_and_check_mamba2_slow_vs_fast_forward(*config_and_inputs)
+
    def test_initialization(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()