From 5d0ba3e479839f3a385799cecc3cf42b4e970797 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Fri, 25 Jul 2025 16:36:12 +0100 Subject: [PATCH] [CI] revert device in `test_export_static_cache` (#39662) * revert device * add todo --- tests/models/cohere2/test_modeling_cohere2.py | 2 +- tests/models/gemma/test_modeling_gemma.py | 2 +- tests/models/gemma2/test_modeling_gemma2.py | 2 +- tests/models/llama/test_modeling_llama.py | 2 +- tests/models/olmo/test_modeling_olmo.py | 2 +- tests/models/olmo2/test_modeling_olmo2.py | 2 +- tests/models/phi3/test_modeling_phi3.py | 2 +- tests/models/qwen2/test_modeling_qwen2.py | 2 +- tests/models/qwen3/test_modeling_qwen3.py | 2 +- tests/models/smollm3/test_modeling_smollm3.py | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/models/cohere2/test_modeling_cohere2.py b/tests/models/cohere2/test_modeling_cohere2.py index 2fe532f673..71335c3707 100644 --- a/tests/models/cohere2/test_modeling_cohere2.py +++ b/tests/models/cohere2/test_modeling_cohere2.py @@ -248,7 +248,7 @@ class Cohere2IntegrationTest(unittest.TestCase): tokenizer = AutoTokenizer.from_pretrained(model_id, pad_token="", padding_side="right") # Load model - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index d7f7a0ce0e..f58fbf569a 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -423,7 +423,7 @@ class GemmaIntegrationTest(unittest.TestCase): ].shape[-1] # Load model - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/gemma2/test_modeling_gemma2.py b/tests/models/gemma2/test_modeling_gemma2.py index 76418997da..589e08dd1d 100644 --- a/tests/models/gemma2/test_modeling_gemma2.py +++ b/tests/models/gemma2/test_modeling_gemma2.py @@ -335,7 +335,7 @@ class Gemma2IntegrationTest(unittest.TestCase): ].shape[-1] # Load model - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 2ffc423be4..136f76f48c 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -322,7 +322,7 @@ class LlamaIntegrationTest(unittest.TestCase): ].shape[-1] # Load model - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py index eea85c7536..86913f254f 100644 --- a/tests/models/olmo/test_modeling_olmo.py +++ b/tests/models/olmo/test_modeling_olmo.py @@ -347,7 +347,7 @@ class OlmoIntegrationTest(unittest.TestCase): ].shape[-1] # Load model - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/olmo2/test_modeling_olmo2.py b/tests/models/olmo2/test_modeling_olmo2.py index 29fb3517d6..20b0c49d3f 100644 --- a/tests/models/olmo2/test_modeling_olmo2.py +++ b/tests/models/olmo2/test_modeling_olmo2.py @@ -348,7 +348,7 @@ class Olmo2IntegrationTest(unittest.TestCase): ].shape[-1] # Load model - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py index aec3c30802..387eb6c4df 100644 --- a/tests/models/phi3/test_modeling_phi3.py +++ b/tests/models/phi3/test_modeling_phi3.py @@ -384,7 +384,7 @@ class Phi3IntegrationTest(unittest.TestCase): config.rope_scaling["type"] = "default" # Load model - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index d66341901e..d48226394c 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -270,7 +270,7 @@ class Qwen2IntegrationTest(unittest.TestCase): ].shape[-1] # Load model - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/qwen3/test_modeling_qwen3.py b/tests/models/qwen3/test_modeling_qwen3.py index 424be1c866..a37df40ed4 100644 --- a/tests/models/qwen3/test_modeling_qwen3.py +++ b/tests/models/qwen3/test_modeling_qwen3.py @@ -261,7 +261,7 @@ class Qwen3IntegrationTest(unittest.TestCase): max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[ "input_ids" ].shape[-1] - device = torch_device + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa" diff --git a/tests/models/smollm3/test_modeling_smollm3.py b/tests/models/smollm3/test_modeling_smollm3.py index 7027716889..f855e0b36a 100644 --- a/tests/models/smollm3/test_modeling_smollm3.py +++ b/tests/models/smollm3/test_modeling_smollm3.py @@ -191,7 +191,7 @@ class SmolLM3IntegrationTest(unittest.TestCase): ].shape[-1] # Load model - device = "cpu" + device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM dtype = torch.bfloat16 cache_implementation = "static" attn_implementation = "sdpa"