Fix head_dim in config extracted from Gemma2 GGUF model (#35818)

fix gemma2 head dim Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com>
2025-01-22 22:22:04 +08:00
parent b3d6722469
commit 487e2f63bd
1 changed files with 3 additions and 0 deletions
--- a/src/transformers/integrations/ggml.py
+++ b/src/transformers/integrations/ggml.py
@@ -198,6 +198,9 @@ GGUF_CONFIG_MAPPING = {
        "embedding_length": "hidden_size",
        "rope.dimension_count": None,
        "rope.freq_base": "rope_theta",
+        # NOTE: Gemma2 has key_length==value_length==head_dim
+        # See: https://github.com/ggerganov/llama.cpp/blob/2e2f8f093cd4fb6bbb87ba84f6b9684fa082f3fa/convert_hf_to_gguf.py#L3293-L3294
+        "attention.key_length": "head_dim",
        "attention.head_count": "num_attention_heads",
        "attention.head_count_kv": "num_key_value_heads",
        "attention.layer_norm_rms_epsilon": "rms_norm_eps",