Cache: init empty cache when use_cache (#34274)

* fix

* fix tests

* fix copies

* add docs

* Revert "add docs"

This reverts commit 32d35634f12ba02781d2ebdee0c8dcfbe992a7b9.

* qwen move deltas

* mllama can potentiall fullgraph compile

* enable mllama compile and fix tests

* remove mllama fixes
This commit is contained in:
Raushan Turganbay
2024-11-25 10:11:33 +01:00
committed by GitHub
parent 1339a14dca
commit c1a8520419
7 changed files with 57 additions and 64 deletions

View File

@@ -1531,6 +1531,14 @@ class GenerationTesterMixin:
embed_dim = getattr(text_config, "d_model", text_config.hidden_size)
per_head_embed_dim = embed_dim // num_attention_heads
# some models have diffent num-head for query vs key/value so we need to assign correct value
# BUT only after `per_head_embed_dim` is set
num_attention_heads = (
text_config.num_key_value_heads
if getattr(text_config, "num_key_value_heads", None) is not None
else num_attention_heads
)
past_kv = outputs["past_key_values"]
self.assertEqual(len(past_kv), num_hidden_layers)