[VLMs] fix flash-attention tests (#37603)

* fix one test * fa2 ln test * remove keys from config recursively * fix * fixup
2025-04-24 11:48:11 +02:00
parent 02baa61fab
commit 1cfcbfcab8
17 changed files with 52 additions and 83 deletions
--- a/tests/models/got_ocr2/test_modeling_got_ocr2.py
+++ b/tests/models/got_ocr2/test_modeling_got_ocr2.py
@@ -222,8 +222,10 @@ class GotOcr2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
    def test_generate_from_inputs_embeds_with_static_cache(self):
        pass

-    @unittest.skip("FlashAttention only support fp16 and bf16 data type")
-    def test_flash_attn_2_fp32_ln(self):
+    @unittest.skip(
+        reason="GotOcr2's language backbone is Qwen2 which uses GQA so the KV cache is a non standard format"
+    )
+    def test_past_key_values_format(self):
        pass