restrict cache allocator to non quantized model (#36428)
This commit is contained in:
@@ -4839,7 +4839,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
model.expected_keys = expected_keys
|
||||
if device_map is not None:
|
||||
expanded_device_map = expand_device_map(device_map, original_loaded_keys, start_prefix)
|
||||
caching_allocator_warmup(model, expanded_device_map, dtype)
|
||||
if hf_quantizer is None:
|
||||
caching_allocator_warmup(model_to_load, expanded_device_map, dtype)
|
||||
|
||||
if device_map is not None and is_safetensors:
|
||||
param_device_map = expanded_device_map
|
||||
|
||||
Reference in New Issue
Block a user