From 2b8a15cc3f1a0c94cf817a8fd8c87bca28737e09 Mon Sep 17 00:00:00 2001 From: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com> Date: Mon, 24 Mar 2025 19:30:01 +0100 Subject: [PATCH] Disallow Offload to disk for gguf files (#36933) update Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> --- src/transformers/modeling_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 7747003aeb..a710c5071a 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -4327,6 +4327,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix "You cannot combine Quantization and loading a model from a GGUF file, try again by making sure you did not passed a `quantization_config` or that you did not load a quantized model from the Hub." ) + if gguf_file and device_map is not None and "disk" in device_map.values(): + raise RuntimeError( + "One or more modules is configured to be mapped to disk. Disk offload is not supported for models " + "loaded from GGUF files." + ) + checkpoint_files, sharded_metadata = _get_resolved_checkpoint_files( pretrained_model_name_or_path=pretrained_model_name_or_path, subfolder=subfolder,