switch to device agnostic device calling for test cases (#38247)

* use device agnostic APIs in test cases

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

* fix style

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

* add one more

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* xpu now supports integer device id, aligning to CUDA behaviors

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

* update to use device_properties

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

* fix style

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

* update comment

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

* fix comments

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

* fix style

Signed-off-by: Matrix Yao <matrix.yao@intel.com>

---------

Signed-off-by: Matrix Yao <matrix.yao@intel.com>
Signed-off-by: YAO Matrix <matrix.yao@intel.com>
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yao Matrix
2025-05-26 16:18:53 +08:00
committed by GitHub
parent cba279f46c
commit a5a0c7b888
39 changed files with 259 additions and 389 deletions

View File

@@ -28,6 +28,7 @@ from transformers import (
)
from transformers.testing_utils import (
Expectations,
get_device_properties,
require_deterministic_for_xpu,
require_flash_attn,
require_torch,
@@ -572,10 +573,10 @@ class BambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
return_tensors="pt", return_seq_idx=True, return_flash_attn_kwargs=True
)
batch = data_collator(features)
batch_cuda = {k: t.cuda() if torch.is_tensor(t) else t for k, t in batch.items()}
batch_accelerator = {k: t.to(torch_device) if torch.is_tensor(t) else t for k, t in batch.items()}
res_padded = model(**inputs_dict)
res_padfree = model(**batch_cuda)
res_padfree = model(**batch_accelerator)
logits_padded = res_padded.logits[inputs_dict["attention_mask"].bool()]
logits_padfree = res_padfree.logits[0]
@@ -594,7 +595,7 @@ class BambaModelIntegrationTest(unittest.TestCase):
tokenizer = None
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
# Depending on the hardware we get different logits / generations
cuda_compute_capability_major_version = None
device_properties = None
@classmethod
def setUpClass(cls):
@@ -606,9 +607,7 @@ class BambaModelIntegrationTest(unittest.TestCase):
cls.tokenizer.pad_token_id = cls.model.config.pad_token_id
cls.tokenizer.padding_side = "left"
if is_torch_available() and torch.cuda.is_available():
# 8 is for A100 / A10 and 7 for T4
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
cls.device_properties = get_device_properties()
def test_simple_generate(self):
expectations = Expectations(
@@ -639,7 +638,7 @@ class BambaModelIntegrationTest(unittest.TestCase):
self.assertEqual(output_sentence, expected)
# TODO: there are significant differences in the logits across major cuda versions, which shouldn't exist
if self.cuda_compute_capability_major_version == 8:
if self.device_properties == ("cuda", 8):
with torch.no_grad():
logits = self.model(input_ids=input_ids, logits_to_keep=40).logits
@@ -692,7 +691,7 @@ class BambaModelIntegrationTest(unittest.TestCase):
self.assertEqual(output_sentences[1], EXPECTED_TEXT[1])
# TODO: there are significant differences in the logits across major cuda versions, which shouldn't exist
if self.cuda_compute_capability_major_version == 8:
if self.device_properties == ("cuda", 8):
with torch.no_grad():
logits = self.model(input_ids=inputs["input_ids"]).logits