Explicitely specify use_cache=True in Flash Attention tests (#27635)

explicit use_cache=True
This commit is contained in:
fxmarty
2023-11-21 17:53:10 +01:00
committed by GitHub
parent c770600fde
commit 7f04373865
2 changed files with 10 additions and 2 deletions

View File

@@ -436,7 +436,11 @@ class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
# Just test that a large cache works as expected
_ = model.generate(
dummy_input, attention_mask=dummy_attention_mask, max_new_tokens=max_new_tokens, do_sample=False
dummy_input,
attention_mask=dummy_attention_mask,
max_new_tokens=max_new_tokens,
do_sample=False,
use_cache=True,
)
@require_flash_attn