From feb83521eca849731573dd40da89a02e4f370e5a Mon Sep 17 00:00:00 2001 From: Weiming Zhao Date: Thu, 22 Jun 2023 17:38:53 -0700 Subject: [PATCH] [llama] Fix comments in weights converter (#24436) Explain the reason to clone tensor --- .../models/llama/convert_llama_weights_to_hf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/llama/convert_llama_weights_to_hf.py b/src/transformers/models/llama/convert_llama_weights_to_hf.py index 45ae4f8072..e8fb7f8252 100644 --- a/src/transformers/models/llama/convert_llama_weights_to_hf.py +++ b/src/transformers/models/llama/convert_llama_weights_to_hf.py @@ -136,8 +136,10 @@ def write_model(model_path, input_base_path, model_size): } else: # Sharded - # Note that in the 13B checkpoint, not cloning the two following weights will result in the checkpoint - # becoming 37GB instead of 26GB for some reason. + # Note that attention.w{q,k,v,o}, feed_fordward.w[1,2,3], attention_norm.weight and ffn_norm.weight share + # the same storage object, saving attention_norm and ffn_norm will save other weights too, which is + # redundant as other weights will be stitched from multiple shards. To avoid that, they are cloned. + state_dict = { f"model.layers.{layer_i}.input_layernorm.weight": loaded[0][ f"layers.{layer_i}.attention_norm.weight"