Fix Cache.max_cache_len max value for Hybrid models (#39737)

* fix gemma

* fix min

* fix quant init issue

* fix gemma 3n

* skip quant cache test

* fix modular

* new test for Gemma

* include cyril change

---------

Co-authored-by: Cyril Vallez <cyril.vallez@gmail.com>
This commit is contained in:
Manuel de Prada Corral
2025-07-29 17:12:50 +02:00
committed by GitHub
parent 075dbbceaa
commit c4e2069898
5 changed files with 82 additions and 40 deletions

View File

@@ -431,6 +431,11 @@ class Gemma3nTextModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Tes
def test_dola_decoding_sample(self):
pass
@pytest.mark.generate
@unittest.skip("Gemma3n does not support QuantizedCache as it performs cache manipulation in the forward pass")
def test_generate_with_quant_cache(self):
pass
class Gemma3nVision2TextModelTester:
text_config = {"activation_sparsity_pattern": None}