enable several cases on XPU (#37516)

* enable several cases on XPU Signed-off-by: YAO Matrix <matrix.yao@intel.com> * Update tests/test_modeling_common.py Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * fix style Signed-off-by: YAO Matrix <matrix.yao@intel.com> --------- Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
2025-04-16 17:01:04 +08:00
parent 5ab7a7c640
commit 33f6c5a5c8
4 changed files with 10 additions and 8 deletions
--- a/tests/quantization/autoawq/test_awq.py
+++ b/tests/quantization/autoawq/test_awq.py
@@ -24,6 +24,7 @@ from transformers.testing_utils import (
    require_intel_extension_for_pytorch,
    require_torch_accelerator,
    require_torch_gpu,
+    require_torch_multi_accelerator,
    require_torch_multi_gpu,
    slow,
    torch_device,
@@ -202,6 +203,7 @@ class AwqTest(unittest.TestCase):
        output = quantized_model.generate(**input_ids, max_new_tokens=40)
        self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT_BF16)

+    @require_torch_gpu
    def test_quantized_model_exllama(self):
        """
        Simple test that checks if the quantized model is working properly with exllama backend
@@ -240,7 +242,7 @@ class AwqTest(unittest.TestCase):
            output = model.generate(**input_ids, max_new_tokens=40)
            self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)

-    @require_torch_multi_gpu
+    @require_torch_multi_accelerator
    def test_quantized_model_multi_gpu(self):
        """
        Simple test that checks if the quantized model is working properly with multiple GPUs
@@ -275,7 +277,7 @@ class AwqTest(unittest.TestCase):


@slow
-@require_torch_gpu
+@require_torch_accelerator
@require_auto_awq
@require_accelerate
 class AwqFusedTest(unittest.TestCase):