Fix Cache.max_cache_len max value for Hybrid models (#39737)

* fix gemma * fix min * fix quant init issue * fix gemma 3n * skip quant cache test * fix modular * new test for Gemma * include cyril change --------- Co-authored-by: Cyril Vallez <cyril.vallez@gmail.com>
2025-07-29 17:12:50 +02:00
parent 075dbbceaa
commit c4e2069898
5 changed files with 82 additions and 40 deletions
--- a/tests/models/gemma3n/test_modeling_gemma3n.py
+++ b/tests/models/gemma3n/test_modeling_gemma3n.py
@@ -431,6 +431,11 @@ class Gemma3nTextModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Tes
    def test_dola_decoding_sample(self):
        pass

+    @pytest.mark.generate
+    @unittest.skip("Gemma3n does not support QuantizedCache as it performs cache manipulation in the forward pass")
+    def test_generate_with_quant_cache(self):
+        pass
+

 class Gemma3nVision2TextModelTester:
    text_config = {"activation_sparsity_pattern": None}