Remove all traces of low_cpu_mem_usage (#38792)

* remove it from all py files

* remove it from the doc

* remove it from examples

* style

* remove traces of _fast_init

* Update test_peft_integration.py

* CIs
This commit is contained in:
Cyril Vallez
2025-06-12 16:39:33 +02:00
committed by GitHub
parent 3542e0b844
commit 4b8ec667e9
76 changed files with 100 additions and 598 deletions

View File

@@ -391,9 +391,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
def test_model_4b_bf16(self):
model_id = "google/gemma-3-4b-it"
model = Gemma3ForConditionalGeneration.from_pretrained(
model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
).to(torch_device)
model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
inputs = self.processor.apply_chat_template(
self.messages,
@@ -421,9 +419,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
def test_model_4b_batch(self):
model_id = "google/gemma-3-4b-it"
model = Gemma3ForConditionalGeneration.from_pretrained(
model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
).to(torch_device)
model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
messages_2 = [
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
@@ -474,9 +470,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
def test_model_4b_crops(self):
model_id = "google/gemma-3-4b-it"
model = Gemma3ForConditionalGeneration.from_pretrained(
model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
).to(torch_device)
model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
crop_config = {
"images_kwargs": {
@@ -516,9 +510,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
def test_model_4b_batch_crops(self):
model_id = "google/gemma-3-4b-it"
model = Gemma3ForConditionalGeneration.from_pretrained(
model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
).to(torch_device)
model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
crop_config = {
"images_kwargs": {
"do_pan_and_scan": True,
@@ -576,9 +568,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
def test_model_4b_multiimage(self):
model_id = "google/gemma-3-4b-it"
model = Gemma3ForConditionalGeneration.from_pretrained(
model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16
).to(torch_device)
model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
messages = [
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
@@ -616,9 +606,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
def test_model_1b_text_only(self):
model_id = "google/gemma-3-1b-it"
model = Gemma3ForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).to(
torch_device
)
model = Gemma3ForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
inputs = tokenizer("Write a poem about Machine Learning.", return_tensors="pt").to(torch_device)