From 02776d2c6aa997c5b81f28f2edf38df9967253be Mon Sep 17 00:00:00 2001 From: Pavel Iakubovskii Date: Fri, 28 Feb 2025 10:48:59 +0000 Subject: [PATCH] Fix loading models with mismatched sizes (#36463) * Fix loading model with mismatched sizes * trigger tests --- src/transformers/modeling_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 1553287c92..e602d7ca8c 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -4907,7 +4907,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix model_to_load, state_dict, start_prefix ) # at this point the state dict should be on cpu, we don't need to actually read it - fixed_state_dict = model_to_load._fix_state_dict_keys_on_load(state_dict) + mismatched_names = [name for name, _, _ in mismatched_keys] + fixed_state_dict = {k: v for k, v in state_dict.items() if k not in mismatched_names} + fixed_state_dict = model_to_load._fix_state_dict_keys_on_load(fixed_state_dict) model_to_load.load_state_dict(fixed_state_dict, strict=False, assign=assign_to_params_buffers) else: # This should always be a list but, just to be sure.