[compile] re-enable for Qwen-VL models (#38127)

* compile qwen models

* delete TODO comment

* fix embeds test

* fix assisted decoding

* add comments
This commit is contained in:
Raushan Turganbay
2025-05-21 11:50:39 +02:00
committed by GitHub
parent 4542086db7
commit a21f11fca2
5 changed files with 182 additions and 158 deletions

View File

@@ -346,10 +346,6 @@ class Qwen2_5_VLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
def test_model_parallelism(self):
pass
@unittest.skip(reason="Compile not yet supported because in Qwen2_5_VL models")
def test_sdpa_can_compile_dynamic(self):
pass
@unittest.skip(reason="Compile not yet supported because in Qwen2_5_VL models")
def test_sdpa_can_dispatch_on_flash(self):
pass
@@ -368,10 +364,6 @@ class Qwen2_5_VLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
def test_generate_from_inputs_embeds_with_static_cache(self):
pass
@unittest.skip(reason="Can't compile fullgraph due to dynamic control flow in `prepare_inputs_for_generate`")
def test_generate_compile_fullgraph(self):
pass
@is_flaky() # TODO (joao/raushan): Investigate why this test is flaky on this model
def test_prompt_lookup_decoding_matches_greedy_search(self):
super().test_prompt_lookup_decoding_matches_greedy_search()

View File

@@ -300,10 +300,6 @@ class Qwen2VLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCas
def test_model_parallelism(self):
pass
@unittest.skip(reason="Compile not yet supported because in Qwen2VL models")
def test_sdpa_can_compile_dynamic(self):
pass
@unittest.skip(reason="Compile not yet supported because in Qwen2VL models")
def test_sdpa_can_dispatch_on_flash(self):
pass