From b45cf0e90afe6273584fa5265cdda1050a6a1c4a Mon Sep 17 00:00:00 2001 From: David LaPalomento Date: Fri, 14 Feb 2025 07:44:31 -0600 Subject: [PATCH] Guard against unset resolved_archive_file (#35628) * archive_file may not be specified When loading a pre-trained model from a gguf file, resolved_archive_file may not be set. Guard against that case in the safetensors availability check. * Remap partial disk offload to cpu for GGUF files GGUF files don't support disk offload so attempt to remap them to the CPU when device_map is auto. If device_map is anything else but None, raise a NotImplementedError. * Don't remap auto device_map and raise RuntimeError If device_map=auto and modules are selected for disk offload, don't attempt to map them to any other device. Raise a runtime error when a GGUF model is configured to map any modules to disk. --------- Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> --- src/transformers/modeling_utils.py | 8 +++++- tests/quantization/ggml/test_ggml.py | 43 ++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 13c8719b36..b75151992c 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -4267,6 +4267,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix # check if we don't have tied param in different devices check_tied_parameters_on_same_device(tied_params, device_map) + if gguf_path and device_map is not None and "disk" in device_map.values(): + raise RuntimeError( + "One or more modules is configured to be mapped to disk. Disk offload is not supported for models " + "loaded from GGUF files." + ) + if from_tf: if resolved_archive_file.endswith(".index"): # Load from a TensorFlow 1.X checkpoint - provided by original authors @@ -4525,7 +4531,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix archive_file = ( resolved_archive_file[0] if isinstance(resolved_archive_file, (list, tuple)) else resolved_archive_file ) - is_safetensors = archive_file.endswith(".safetensors") + is_safetensors = archive_file is not None and archive_file.endswith(".safetensors") if offload_folder is None and not is_safetensors: raise ValueError( "The current `device_map` had weights offloaded to the disk. Please provide an `offload_folder`" diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py index 956da875a0..efba55d42b 100644 --- a/tests/quantization/ggml/test_ggml.py +++ b/tests/quantization/ggml/test_ggml.py @@ -219,6 +219,49 @@ class GgufIntegrationTests(unittest.TestCase): EXPECTED_TEXT = "Hello, World!\n\nStep 3: Add" self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT) + def test_gguf_errors_disk_offload(self): + from collections import OrderedDict + + q2_k_gguf_model_id = self.gguf_filename.format(quant_type=QuantType.Q2_K.name) + with self.assertRaises(RuntimeError): + AutoModelForCausalLM.from_pretrained( + self.gguf_model_id, + device_map=OrderedDict( + [ + ("model.embed_tokens", "cpu"), + ("lm_head", "cpu"), + ("model.layers.0", "cpu"), + ("model.layers.1", "cpu"), + ("model.layers.2", "cpu"), + ("model.layers.3", "cpu"), + ("model.layers.4", "cpu"), + ("model.layers.5", "cpu"), + ("model.layers.6", "cpu"), + ("model.layers.7", "cpu"), + ("model.layers.8", "cpu"), + ("model.layers.9", "cpu"), + ("model.layers.10", "disk"), + ("model.layers.11", "disk"), + ("model.layers.12", "disk"), + ("model.layers.13", "disk"), + ("model.layers.14", "disk"), + ("model.layers.15", "disk"), + ("model.layers.16", "disk"), + ("model.layers.17", "disk"), + ("model.layers.18", "disk"), + ("model.layers.19", "disk"), + ("model.layers.20", "disk"), + ("model.layers.21", "disk"), + ("model.layers.22", "disk"), + ("model.norm", "disk"), + ("model.rotary_emb", "disk"), + ] + ), + gguf_file=q2_k_gguf_model_id, + offload_folder="offload", + offload_state_dict=True, + ) + @require_gguf @require_torch_gpu