From 27361bd218e5d1ecffc68b9317c49d3a764dfbba Mon Sep 17 00:00:00 2001
From: jiqing-feng <jiqing.feng@intel.com>
Date: Mon, 17 Mar 2025 22:57:49 +0800
Subject: [PATCH] fix xpu tests (#36656)

* fix awq xpu tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* update

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix llava next video bnb tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

---------

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
---
 src/transformers/utils/quantization_config.py   |  3 +++
 .../test_modeling_llava_next_video.py           | 10 +++++-----
 tests/quantization/autoawq/test_awq.py          | 17 +++++++++++------
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py
index 0988d8ac14..851249b270 100755
--- a/src/transformers/utils/quantization_config.py
+++ b/src/transformers/utils/quantization_config.py
@@ -902,6 +902,9 @@ class AwqConfig(QuantizationConfigMixin):
             )
 
         if self.backend == AwqBackendPackingMethod.LLMAWQ:
+            # Only cuda device can run this function
+            if not torch.cuda.is_available():
+                raise ValueError("LLM-AWQ backend is only supported on CUDA")
             compute_capability = torch.cuda.get_device_capability()
             major, minor = compute_capability
             if major < 8:
diff --git a/tests/models/llava_next_video/test_modeling_llava_next_video.py b/tests/models/llava_next_video/test_modeling_llava_next_video.py
index 44c7deeffc..ba7323a075 100644
--- a/tests/models/llava_next_video/test_modeling_llava_next_video.py
+++ b/tests/models/llava_next_video/test_modeling_llava_next_video.py
@@ -430,13 +430,13 @@ class LlavaNextVideoForConditionalGenerationIntegrationTest(unittest.TestCase):
 
         # verify generation
         output = model.generate(**inputs, do_sample=False, max_new_tokens=40)
-        EXPECTED_DECODED_TEXT = 'USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems'  # fmt: skip
-
-        self.assertEqual(
-            self.processor.decode(output[0], skip_special_tokens=True),
-            EXPECTED_DECODED_TEXT,
+        EXPECTED_DECODED_TEXT = (
+            "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems",  # cuda output
+            "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while wearing a pair of glasses that are too large for them. The glasses are",  # xpu output
         )
 
+        self.assertTrue(self.processor.decode(output[0], skip_special_tokens=True) in EXPECTED_DECODED_TEXT)
+
     @slow
     @require_bitsandbytes
     def test_small_model_integration_test_batch(self):
diff --git a/tests/quantization/autoawq/test_awq.py b/tests/quantization/autoawq/test_awq.py
index 5238c29a9c..d597f8de71 100644
--- a/tests/quantization/autoawq/test_awq.py
+++ b/tests/quantization/autoawq/test_awq.py
@@ -59,16 +59,21 @@ class AwqConfigTest(unittest.TestCase):
         with self.assertRaises(ValueError):
             AwqConfig(bits=4, backend="unexisting-backend")
 
-        compute_capability = torch.cuda.get_device_capability()
-        major, minor = compute_capability
+        # Only cuda device can run this function
+        support_llm_awq = False
+        if torch.cuda.is_available():
+            compute_capability = torch.cuda.get_device_capability()
+            major, minor = compute_capability
+            if major >= 8:
+                support_llm_awq = True
 
-        if major < 8:
+        if support_llm_awq:
+            # LLMAWQ should work on an A100
+            AwqConfig(bits=4, backend="llm-awq")
+        else:
             # LLMAWQ does not work on a T4
             with self.assertRaises(ValueError):
                 AwqConfig(bits=4, backend="llm-awq")
-        else:
-            # LLMAWQ should work on an A100
-            AwqConfig(bits=4, backend="llm-awq")
 
     def test_to_dict(self):
         """