restrict cache allocator to non quantized model (#36428)

This commit is contained in:
Marc Sun
2025-02-26 22:16:15 +01:00
committed by GitHub
parent a7fbab33ae
commit 8ede897c30

View File

@@ -4839,7 +4839,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
model.expected_keys = expected_keys
if device_map is not None:
expanded_device_map = expand_device_map(device_map, original_loaded_keys, start_prefix)
caching_allocator_warmup(model, expanded_device_map, dtype)
if hf_quantizer is None:
caching_allocator_warmup(model_to_load, expanded_device_map, dtype)
if device_map is not None and is_safetensors:
param_device_map = expanded_device_map