[CI] revert device in test_export_static_cache (#39662)
* revert device * add todo
This commit is contained in:
@@ -248,7 +248,7 @@ class Cohere2IntegrationTest(unittest.TestCase):
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, pad_token="<PAD>", padding_side="right")
|
||||
# Load model
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -423,7 +423,7 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
].shape[-1]
|
||||
|
||||
# Load model
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -335,7 +335,7 @@ class Gemma2IntegrationTest(unittest.TestCase):
|
||||
].shape[-1]
|
||||
|
||||
# Load model
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -322,7 +322,7 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
].shape[-1]
|
||||
|
||||
# Load model
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -347,7 +347,7 @@ class OlmoIntegrationTest(unittest.TestCase):
|
||||
].shape[-1]
|
||||
|
||||
# Load model
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -348,7 +348,7 @@ class Olmo2IntegrationTest(unittest.TestCase):
|
||||
].shape[-1]
|
||||
|
||||
# Load model
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -384,7 +384,7 @@ class Phi3IntegrationTest(unittest.TestCase):
|
||||
config.rope_scaling["type"] = "default"
|
||||
|
||||
# Load model
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -270,7 +270,7 @@ class Qwen2IntegrationTest(unittest.TestCase):
|
||||
].shape[-1]
|
||||
|
||||
# Load model
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -261,7 +261,7 @@ class Qwen3IntegrationTest(unittest.TestCase):
|
||||
max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[
|
||||
"input_ids"
|
||||
].shape[-1]
|
||||
device = torch_device
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
@@ -191,7 +191,7 @@ class SmolLM3IntegrationTest(unittest.TestCase):
|
||||
].shape[-1]
|
||||
|
||||
# Load model
|
||||
device = "cpu"
|
||||
device = "cpu" # TODO (joao / export experts): should be on `torch_device`, but causes GPU OOM
|
||||
dtype = torch.bfloat16
|
||||
cache_implementation = "static"
|
||||
attn_implementation = "sdpa"
|
||||
|
||||
Reference in New Issue
Block a user