[Gemma3] compile (#37447)

This commit is contained in:
Joao Gante
2025-04-18 14:55:43 +01:00
committed by GitHub
parent a1b82563f1
commit e5ac23081e
10 changed files with 90 additions and 148 deletions

View File

@@ -2075,9 +2075,6 @@ class GenerationTesterMixin:
Tests that `.generate` is compatible with torch.compile without graph breaks, keeping the same results.
⚠️ Runs two sequential generations to ensure the cache doesn't get stuck after the first compiled run! ⚠️
"""
# Monkey-patching the HybridCache at test-time to continue testing compilation support
HybridCache.is_compileable = True
for model_class in self.all_generative_model_classes:
if not model_class._supports_static_cache:
self.skipTest("This model doesn't support static cache (= no expectations of compilation support)")
@@ -2174,9 +2171,6 @@ class GenerationTesterMixin:
Tests that all optional outputs are behaving as expected when compilation is triggered.
In essence, it's the same as `test_greedy_generate_dict_outputs`, but with automatic compilation triggered.
"""
# Monkey-patching the HybridCache at test-time to continue testing compilation support
HybridCache.is_compileable = True
for model_class in self.all_generative_model_classes:
if not model_class._supports_static_cache:
self.skipTest("This model doesn't support static cache (= no expectations of compilation support)")

View File

@@ -154,6 +154,10 @@ class Gemma2ModelTest(GemmaModelTest, unittest.TestCase):
def test_multi_gpu_data_parallel_forward(self):
pass
@unittest.skip("Gemma2 has HybridCache which auto-compiles. Compile and FA2 don't work together.")
def test_eager_matches_fa2_generate(self):
pass
@slow
@require_torch_accelerator

View File

@@ -329,6 +329,10 @@ class Gemma3Vision2TextModelTest(ModelTesterMixin, GenerationTesterMixin, unitte
def test_generate_from_inputs_embeds_with_static_cache(self):
pass
@unittest.skip("Gemma3 has HybridCache which auto-compiles. Compile and FA2 don't work together.")
def test_eager_matches_fa2_generate(self):
pass
@unittest.skip(
reason="Siglip (vision backbone) uses the same initialization scheme as the Flax original implementation"
)