fix xpu tests (#36656)

* fix awq xpu tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * update Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix llava next video bnb tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> --------- Signed-off-by: jiqing-feng <jiqing.feng@intel.com> Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
2025-03-17 22:57:49 +08:00
parent da7d64f4ff
commit 27361bd218
3 changed files with 19 additions and 11 deletions
--- a/src/transformers/utils/quantization_config.py
+++ b/src/transformers/utils/quantization_config.py
@@ -902,6 +902,9 @@ class AwqConfig(QuantizationConfigMixin):
            )

        if self.backend == AwqBackendPackingMethod.LLMAWQ:
+            # Only cuda device can run this function
+            if not torch.cuda.is_available():
+                raise ValueError("LLM-AWQ backend is only supported on CUDA")
            compute_capability = torch.cuda.get_device_capability()
            major, minor = compute_capability
            if major < 8:
--- a/tests/models/llava_next_video/test_modeling_llava_next_video.py
+++ b/tests/models/llava_next_video/test_modeling_llava_next_video.py
@@ -430,13 +430,13 @@ class LlavaNextVideoForConditionalGenerationIntegrationTest(unittest.TestCase):

        # verify generation
        output = model.generate(**inputs, do_sample=False, max_new_tokens=40)
-        EXPECTED_DECODED_TEXT = 'USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems'  # fmt: skip
-
-        self.assertEqual(
-            self.processor.decode(output[0], skip_special_tokens=True),
-            EXPECTED_DECODED_TEXT,
+        EXPECTED_DECODED_TEXT = (
+            "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems",  # cuda output
+            "USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while wearing a pair of glasses that are too large for them. The glasses are",  # xpu output
        )

+        self.assertTrue(self.processor.decode(output[0], skip_special_tokens=True) in EXPECTED_DECODED_TEXT)
+
    @slow
    @require_bitsandbytes
    def test_small_model_integration_test_batch(self):
--- a/tests/quantization/autoawq/test_awq.py
+++ b/tests/quantization/autoawq/test_awq.py
@@ -59,16 +59,21 @@ class AwqConfigTest(unittest.TestCase):
        with self.assertRaises(ValueError):
            AwqConfig(bits=4, backend="unexisting-backend")

-        compute_capability = torch.cuda.get_device_capability()
-        major, minor = compute_capability
+        # Only cuda device can run this function
+        support_llm_awq = False
+        if torch.cuda.is_available():
+            compute_capability = torch.cuda.get_device_capability()
+            major, minor = compute_capability
+            if major >= 8:
+                support_llm_awq = True

-        if major < 8:
+        if support_llm_awq:
+            # LLMAWQ should work on an A100
+            AwqConfig(bits=4, backend="llm-awq")
+        else:
            # LLMAWQ does not work on a T4
            with self.assertRaises(ValueError):
                AwqConfig(bits=4, backend="llm-awq")
-        else:
-            # LLMAWQ should work on an A100
-            AwqConfig(bits=4, backend="llm-awq")

    def test_to_dict(self):
        """