restrict cache allocator to non quantized model (#36428)
This commit is contained in:
@@ -4839,7 +4839,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||||||
model.expected_keys = expected_keys
|
model.expected_keys = expected_keys
|
||||||
if device_map is not None:
|
if device_map is not None:
|
||||||
expanded_device_map = expand_device_map(device_map, original_loaded_keys, start_prefix)
|
expanded_device_map = expand_device_map(device_map, original_loaded_keys, start_prefix)
|
||||||
caching_allocator_warmup(model, expanded_device_map, dtype)
|
if hf_quantizer is None:
|
||||||
|
caching_allocator_warmup(model_to_load, expanded_device_map, dtype)
|
||||||
|
|
||||||
if device_map is not None and is_safetensors:
|
if device_map is not None and is_safetensors:
|
||||||
param_device_map = expanded_device_map
|
param_device_map = expanded_device_map
|
||||||
|
|||||||
Reference in New Issue
Block a user