switch to device agnostic device calling for test cases (#38247)

* use device agnostic APIs in test cases Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> * add one more Signed-off-by: YAO Matrix <matrix.yao@intel.com> * xpu now supports integer device id, aligning to CUDA behaviors Signed-off-by: Matrix Yao <matrix.yao@intel.com> * update to use device_properties Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> * update comment Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix comments Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> --------- Signed-off-by: Matrix Yao <matrix.yao@intel.com> Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-05-26 16:18:53 +08:00
parent cba279f46c
commit a5a0c7b888
39 changed files with 259 additions and 389 deletions
--- a/tests/models/bamba/test_modeling_bamba.py
+++ b/tests/models/bamba/test_modeling_bamba.py
@@ -28,6 +28,7 @@ from transformers import (
 )
 from transformers.testing_utils import (
    Expectations,
+    get_device_properties,
    require_deterministic_for_xpu,
    require_flash_attn,
    require_torch,
@@ -572,10 +573,10 @@ class BambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
                    return_tensors="pt", return_seq_idx=True, return_flash_attn_kwargs=True
                )
                batch = data_collator(features)
-                batch_cuda = {k: t.cuda() if torch.is_tensor(t) else t for k, t in batch.items()}
+                batch_accelerator = {k: t.to(torch_device) if torch.is_tensor(t) else t for k, t in batch.items()}

                res_padded = model(**inputs_dict)
-                res_padfree = model(**batch_cuda)
+                res_padfree = model(**batch_accelerator)

                logits_padded = res_padded.logits[inputs_dict["attention_mask"].bool()]
                logits_padfree = res_padfree.logits[0]
@@ -594,7 +595,7 @@ class BambaModelIntegrationTest(unittest.TestCase):
    tokenizer = None
    # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
    # Depending on the hardware we get different logits / generations
-    cuda_compute_capability_major_version = None
+    device_properties = None

    @classmethod
    def setUpClass(cls):
@@ -606,9 +607,7 @@ class BambaModelIntegrationTest(unittest.TestCase):
        cls.tokenizer.pad_token_id = cls.model.config.pad_token_id
        cls.tokenizer.padding_side = "left"

-        if is_torch_available() and torch.cuda.is_available():
-            # 8 is for A100 / A10 and 7 for T4
-            cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
+        cls.device_properties = get_device_properties()

    def test_simple_generate(self):
        expectations = Expectations(
@@ -639,7 +638,7 @@ class BambaModelIntegrationTest(unittest.TestCase):
        self.assertEqual(output_sentence, expected)

        # TODO: there are significant differences in the logits across major cuda versions, which shouldn't exist
-        if self.cuda_compute_capability_major_version == 8:
+        if self.device_properties == ("cuda", 8):
            with torch.no_grad():
                logits = self.model(input_ids=input_ids, logits_to_keep=40).logits

@@ -692,7 +691,7 @@ class BambaModelIntegrationTest(unittest.TestCase):
        self.assertEqual(output_sentences[1], EXPECTED_TEXT[1])

        # TODO: there are significant differences in the logits across major cuda versions, which shouldn't exist
-        if self.cuda_compute_capability_major_version == 8:
+        if self.device_properties == ("cuda", 8):
            with torch.no_grad():
                logits = self.model(input_ids=inputs["input_ids"]).logits