Fix cuda index issue in cache allocator (#36937)

fix
2025-03-25 11:51:41 +01:00
parent 0f733110a6
commit 80b4c5dcc9
1 changed files with 2 additions and 1 deletions
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -5870,7 +5870,8 @@ def caching_allocator_warmup(model: PreTrainedModel, expanded_device_map: Dict):
    # This will kick off the caching allocator to avoid having to Malloc afterwards
    for device, byte_count in total_byte_count.items():
        if device.type == "cuda":
-            device_memory = torch.cuda.mem_get_info(device)[0]
+            index = device.index if device.index is not None else torch.cuda.current_device()
            device_memory = torch.cuda.mem_get_info(index)[0]
            # Allow up to 95% of max device memory
            byte_count = min(byte_count, int(0.95 * device_memory))
        # Allocate memory