[VLMs] fix flash-attention tests (#37603)

* fix one test

* fa2 ln test

* remove keys from config recursively

* fix

* fixup
This commit is contained in:
Raushan Turganbay
2025-04-24 11:48:11 +02:00
committed by GitHub
parent 02baa61fab
commit 1cfcbfcab8
17 changed files with 52 additions and 83 deletions

View File

@@ -222,8 +222,10 @@ class GotOcr2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
def test_generate_from_inputs_embeds_with_static_cache(self):
pass
@unittest.skip("FlashAttention only support fp16 and bf16 data type")
def test_flash_attn_2_fp32_ln(self):
@unittest.skip(
reason="GotOcr2's language backbone is Qwen2 which uses GQA so the KV cache is a non standard format"
)
def test_past_key_values_format(self):
pass