[compile] re-enable for Qwen-VL models (#38127)

* compile qwen models * delete TODO comment * fix embeds test * fix assisted decoding * add comments
2025-05-21 11:50:39 +02:00
parent 4542086db7
commit a21f11fca2
5 changed files with 182 additions and 158 deletions
--- a/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py
+++ b/tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py
@@ -346,10 +346,6 @@ class Qwen2_5_VLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
    def test_model_parallelism(self):
        pass

-    @unittest.skip(reason="Compile not yet supported because in Qwen2_5_VL models")
-    def test_sdpa_can_compile_dynamic(self):
-        pass
-
    @unittest.skip(reason="Compile not yet supported because in Qwen2_5_VL models")
    def test_sdpa_can_dispatch_on_flash(self):
        pass
@@ -368,10 +364,6 @@ class Qwen2_5_VLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
    def test_generate_from_inputs_embeds_with_static_cache(self):
        pass

-    @unittest.skip(reason="Can't compile fullgraph due to dynamic control flow in `prepare_inputs_for_generate`")
-    def test_generate_compile_fullgraph(self):
-        pass
-
    @is_flaky()  # TODO (joao/raushan): Investigate why this test is flaky on this model
    def test_prompt_lookup_decoding_matches_greedy_search(self):
        super().test_prompt_lookup_decoding_matches_greedy_search()
--- a/tests/models/qwen2_vl/test_modeling_qwen2_vl.py
+++ b/tests/models/qwen2_vl/test_modeling_qwen2_vl.py
@@ -300,10 +300,6 @@ class Qwen2VLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCas
    def test_model_parallelism(self):
        pass

-    @unittest.skip(reason="Compile not yet supported because in Qwen2VL models")
-    def test_sdpa_can_compile_dynamic(self):
-        pass
-
    @unittest.skip(reason="Compile not yet supported because in Qwen2VL models")
    def test_sdpa_can_dispatch_on_flash(self):
        pass