[VLMs] fix flash-attention tests (#37603)
* fix one test * fa2 ln test * remove keys from config recursively * fix * fixup
This commit is contained in:
committed by
GitHub
parent
02baa61fab
commit
1cfcbfcab8
@@ -222,8 +222,10 @@ class GotOcr2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
||||
def test_generate_from_inputs_embeds_with_static_cache(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("FlashAttention only support fp16 and bf16 data type")
|
||||
def test_flash_attn_2_fp32_ln(self):
|
||||
@unittest.skip(
|
||||
reason="GotOcr2's language backbone is Qwen2 which uses GQA so the KV cache is a non standard format"
|
||||
)
|
||||
def test_past_key_values_format(self):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user