Fix Mamba2 Grouped SSD Support in the torch_forward Path (#37533)
* Fix mamba2 grouped support in bamba torch path * patch zamba2 and mamba2 * Add a unit test for grouped SSD * add comment for the new unit test * add output_size arg value to repeat_interleave calls * Add comment
This commit is contained in:
@@ -238,6 +238,14 @@ class Mamba2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_mamba2_slow_vs_fast_forward(*config_and_inputs)
|
||||
|
||||
# This test adjusts n_groups to half the original setting and effectively
|
||||
# creates a grouped SSD configuration in the mamba2 layers
|
||||
# See https://github.com/huggingface/transformers/pull/37533/
|
||||
def test_mamba2_slow_vs_fast_forward_grouped(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
config_and_inputs[0].n_groups //= 2
|
||||
self.model_tester.create_and_check_mamba2_slow_vs_fast_forward(*config_and_inputs)
|
||||
|
||||
def test_initialization(self):
|
||||
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user