Guard against unset resolved_archive_file (#35628)
* archive_file may not be specified When loading a pre-trained model from a gguf file, resolved_archive_file may not be set. Guard against that case in the safetensors availability check. * Remap partial disk offload to cpu for GGUF files GGUF files don't support disk offload so attempt to remap them to the CPU when device_map is auto. If device_map is anything else but None, raise a NotImplementedError. * Don't remap auto device_map and raise RuntimeError If device_map=auto and modules are selected for disk offload, don't attempt to map them to any other device. Raise a runtime error when a GGUF model is configured to map any modules to disk. --------- Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
96f01a36ac
commit
b45cf0e90a
@@ -219,6 +219,49 @@ class GgufIntegrationTests(unittest.TestCase):
|
||||
EXPECTED_TEXT = "Hello, World!\n\nStep 3: Add"
|
||||
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
|
||||
|
||||
def test_gguf_errors_disk_offload(self):
|
||||
from collections import OrderedDict
|
||||
|
||||
q2_k_gguf_model_id = self.gguf_filename.format(quant_type=QuantType.Q2_K.name)
|
||||
with self.assertRaises(RuntimeError):
|
||||
AutoModelForCausalLM.from_pretrained(
|
||||
self.gguf_model_id,
|
||||
device_map=OrderedDict(
|
||||
[
|
||||
("model.embed_tokens", "cpu"),
|
||||
("lm_head", "cpu"),
|
||||
("model.layers.0", "cpu"),
|
||||
("model.layers.1", "cpu"),
|
||||
("model.layers.2", "cpu"),
|
||||
("model.layers.3", "cpu"),
|
||||
("model.layers.4", "cpu"),
|
||||
("model.layers.5", "cpu"),
|
||||
("model.layers.6", "cpu"),
|
||||
("model.layers.7", "cpu"),
|
||||
("model.layers.8", "cpu"),
|
||||
("model.layers.9", "cpu"),
|
||||
("model.layers.10", "disk"),
|
||||
("model.layers.11", "disk"),
|
||||
("model.layers.12", "disk"),
|
||||
("model.layers.13", "disk"),
|
||||
("model.layers.14", "disk"),
|
||||
("model.layers.15", "disk"),
|
||||
("model.layers.16", "disk"),
|
||||
("model.layers.17", "disk"),
|
||||
("model.layers.18", "disk"),
|
||||
("model.layers.19", "disk"),
|
||||
("model.layers.20", "disk"),
|
||||
("model.layers.21", "disk"),
|
||||
("model.layers.22", "disk"),
|
||||
("model.norm", "disk"),
|
||||
("model.rotary_emb", "disk"),
|
||||
]
|
||||
),
|
||||
gguf_file=q2_k_gguf_model_id,
|
||||
offload_folder="offload",
|
||||
offload_state_dict=True,
|
||||
)
|
||||
|
||||
|
||||
@require_gguf
|
||||
@require_torch_gpu
|
||||
|
||||
Reference in New Issue
Block a user