switch to device agnostic device calling for test cases (#38247)

* use device agnostic APIs in test cases Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> * add one more Signed-off-by: YAO Matrix <matrix.yao@intel.com> * xpu now supports integer device id, aligning to CUDA behaviors Signed-off-by: Matrix Yao <matrix.yao@intel.com> * update to use device_properties Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> * update comment Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix comments Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> --------- Signed-off-by: Matrix Yao <matrix.yao@intel.com> Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-05-26 16:18:53 +08:00
parent cba279f46c
commit a5a0c7b888
39 changed files with 259 additions and 389 deletions
--- a/tests/quantization/compressed_tensors_integration/test_compressed_models.py
+++ b/tests/quantization/compressed_tensors_integration/test_compressed_models.py
@@ -3,7 +3,7 @@ import unittest
 import warnings

 from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers.testing_utils import require_compressed_tensors, require_torch
+from transformers.testing_utils import backend_empty_cache, require_compressed_tensors, require_torch, torch_device
 from transformers.utils import is_torch_available
 from transformers.utils.quantization_config import CompressedTensorsConfig

@@ -41,7 +41,7 @@ class StackCompressedModelTest(unittest.TestCase):

    def tearDown(self):
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
        gc.collect()

    def test_compressed_uncompressed_model_shapes(self):
@@ -160,7 +160,7 @@ class RunCompressedTest(unittest.TestCase):

    def tearDown(self):
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
        gc.collect()

    def test_default_run_compressed__True(self):
--- a/tests/quantization/compressed_tensors_integration/test_compressed_tensors.py
+++ b/tests/quantization/compressed_tensors_integration/test_compressed_tensors.py
@@ -2,7 +2,7 @@ import gc
 import unittest

 from transformers import AutoModelForCausalLM, AutoTokenizer, CompressedTensorsConfig
-from transformers.testing_utils import require_compressed_tensors, require_torch
+from transformers.testing_utils import backend_empty_cache, require_compressed_tensors, require_torch, torch_device
 from transformers.utils import is_torch_available


@@ -22,7 +22,7 @@ class CompressedTensorsTest(unittest.TestCase):

    def tearDown(self):
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
        gc.collect()

    def test_config_args(self):