Fix Cache.max_cache_len max value for Hybrid models (#39737)
* fix gemma * fix min * fix quant init issue * fix gemma 3n * skip quant cache test * fix modular * new test for Gemma * include cyril change --------- Co-authored-by: Cyril Vallez <cyril.vallez@gmail.com>
This commit is contained in:
committed by
GitHub
parent
075dbbceaa
commit
c4e2069898
@@ -431,6 +431,11 @@ class Gemma3nTextModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Tes
|
||||
def test_dola_decoding_sample(self):
|
||||
pass
|
||||
|
||||
@pytest.mark.generate
|
||||
@unittest.skip("Gemma3n does not support QuantizedCache as it performs cache manipulation in the forward pass")
|
||||
def test_generate_with_quant_cache(self):
|
||||
pass
|
||||
|
||||
|
||||
class Gemma3nVision2TextModelTester:
|
||||
text_config = {"activation_sparsity_pattern": None}
|
||||
|
||||
Reference in New Issue
Block a user