diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 8fdcf662fc..30adf05ac6 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1577,7 +1577,8 @@ def _find_mismatched_keys( # This skips size mismatches for 4-bit weights. Two 4-bit values share an 8-bit container, causing size differences. # Without matching with module type or parameter type it seems like a practical way to detect valid 4bit weights. if not ( - new_state_dict[key].shape[-1] == 1 + is_quantized + and new_state_dict[key].shape[-1] == 1 and new_state_dict[key].numel() * 2 == model_state_dict[key].numel() ): mismatched_keys.append(key)