From 06c4a4d499aeb213c558d6fb59adf864a6062dad Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 1 Jul 2025 17:32:20 +0800 Subject: [PATCH] fix caching_allocator_warmup with tie weights (#39070) * fix caching_allocator_warmup with tie weights Signed-off-by: jiqing-feng * fix comment Signed-off-by: jiqing-feng --------- Signed-off-by: jiqing-feng --- src/transformers/modeling_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index e99fb31ca3..e6b7031ab3 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -5843,7 +5843,12 @@ def caching_allocator_warmup(model: PreTrainedModel, expanded_device_map: dict, else None ) total_byte_count = defaultdict(lambda: 0) + tied_param_names = _get_tied_weight_keys(model) for param_name, device in accelerator_device_map.items(): + # Skip if the parameter has already been accounted for (tied weights) + if param_name in tied_param_names: + continue + param = model.get_parameter_or_buffer(param_name) # The dtype of different parameters may be different with composite models or `keep_in_fp32_modules` param_byte_count = param.numel() * param.element_size()