Remove all traces of low_cpu_mem_usage (#38792)

* remove it from all py files * remove it from the doc * remove it from examples * style * remove traces of _fast_init * Update test_peft_integration.py * CIs
2025-06-12 16:39:33 +02:00
parent 3542e0b844
commit 4b8ec667e9
76 changed files with 100 additions and 598 deletions
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@@ -2337,7 +2337,6 @@ class GenerationTesterMixin:
                model_eager = model_class.from_pretrained(
                    tmpdirname,
                    torch_dtype=torch.float16,
-                    low_cpu_mem_usage=True,
                    attn_implementation="eager",
                ).to(torch_device)
                res_eager = model_eager.generate(**inputs_dict, **generate_kwargs)
@@ -2347,7 +2346,6 @@ class GenerationTesterMixin:
                model_attn = model_class.from_pretrained(
                    tmpdirname,
                    torch_dtype=torch.float16,
-                    low_cpu_mem_usage=True,
                    attn_implementation=attn_implementation,
                ).to(torch_device)
                res_attn = model_attn.generate(**inputs_dict, **generate_kwargs)
@@ -3724,7 +3722,6 @@ class GenerationIntegrationTests(unittest.TestCase):
        processor = AutoProcessor.from_pretrained(model_id)
        model = AutoModelForSpeechSeq2Seq.from_pretrained(
            model_id,
-            low_cpu_mem_usage=True,
            use_safetensors=True,
        )
        model.to(torch_device)
@@ -3743,7 +3740,6 @@ class GenerationIntegrationTests(unittest.TestCase):
        # Load its decoder only version:
        assistant_causal_lm = AutoModelForCausalLM.from_pretrained(
            assistant_distil_model_id,
-            low_cpu_mem_usage=True,
            use_safetensors=True,
        ).to(torch_device)
        self.assertTrue(model.generate(**features, assistant_model=assistant_causal_lm).sum())
@@ -3759,7 +3755,6 @@ class GenerationIntegrationTests(unittest.TestCase):
        # Load its decoder only version:
        assistant_causal_lm = AutoModelForCausalLM.from_pretrained(
            assistant_distil_model_id,
-            low_cpu_mem_usage=True,
            use_safetensors=True,
        ).to(torch_device)
        # It will raise an error as the encoder of the main and assistant model are not compatible: