Remove all traces of low_cpu_mem_usage (#38792)

* remove it from all py files * remove it from the doc * remove it from examples * style * remove traces of _fast_init * Update test_peft_integration.py * CIs
2025-06-12 16:39:33 +02:00
parent 3542e0b844
commit 4b8ec667e9
76 changed files with 100 additions and 598 deletions
--- a/tests/models/gemma3/test_modeling_gemma3.py
+++ b/tests/models/gemma3/test_modeling_gemma3.py
@@ -391,9 +391,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
    def test_model_4b_bf16(self):
        model_id = "google/gemma-3-4b-it"

-        model = Gemma3ForConditionalGeneration.from_pretrained(
-            model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
-        ).to(torch_device)
+        model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)

        inputs = self.processor.apply_chat_template(
            self.messages,
@@ -421,9 +419,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
    def test_model_4b_batch(self):
        model_id = "google/gemma-3-4b-it"

-        model = Gemma3ForConditionalGeneration.from_pretrained(
-            model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
-        ).to(torch_device)
+        model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)

        messages_2 = [
            {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
@@ -474,9 +470,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
    def test_model_4b_crops(self):
        model_id = "google/gemma-3-4b-it"

-        model = Gemma3ForConditionalGeneration.from_pretrained(
-            model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
-        ).to(torch_device)
+        model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)

        crop_config = {
            "images_kwargs": {
@@ -516,9 +510,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
    def test_model_4b_batch_crops(self):
        model_id = "google/gemma-3-4b-it"

-        model = Gemma3ForConditionalGeneration.from_pretrained(
-            model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
-        ).to(torch_device)
+        model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
        crop_config = {
            "images_kwargs": {
                "do_pan_and_scan": True,
@@ -576,9 +568,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
    def test_model_4b_multiimage(self):
        model_id = "google/gemma-3-4b-it"

-        model = Gemma3ForConditionalGeneration.from_pretrained(
-            model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
-        ).to(torch_device)
+        model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)

        messages = [
            {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
@@ -616,9 +606,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
    def test_model_1b_text_only(self):
        model_id = "google/gemma-3-1b-it"

-        model = Gemma3ForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
-            torch_device
-        )
+        model = Gemma3ForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
        tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
        inputs = tokenizer("Write a poem about Machine Learning.", return_tensors="pt").to(torch_device)