From b2816da8021b4e7568cb1e840a5d9aa1357c26a7 Mon Sep 17 00:00:00 2001
From: Yao Matrix <matrix.yao@intel.com>
Date: Tue, 8 Jul 2025 16:18:26 +0800
Subject: [PATCH] fix xpu failures on PT 2.7 and 2.8 w/o IPEX and enable hqq
 cases on XPU (#39187)

* chameleon xpu bnb groundtruth update on bnb triton backend since we are
deprecating ipex backend

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* enable hqq uts on XPU, all passed

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix style

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix comment

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

---------

Signed-off-by: YAO Matrix <matrix.yao@intel.com>
---
 src/transformers/quantizers/quantizer_hqq.py  |  6 +++---
 .../chameleon/test_modeling_chameleon.py      |  2 +-
 tests/quantization/hqq/test_hqq.py            | 21 ++++++++++---------
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/transformers/quantizers/quantizer_hqq.py b/src/transformers/quantizers/quantizer_hqq.py
index 6061c72c24..160a689d80 100755
--- a/src/transformers/quantizers/quantizer_hqq.py
+++ b/src/transformers/quantizers/quantizer_hqq.py
@@ -15,7 +15,7 @@
 from typing import TYPE_CHECKING, Any
 
 from ..integrations import prepare_for_hqq_linear
-from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, logging
+from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, is_torch_xpu_available, logging
 from .base import HfQuantizer
 from .quantizers_utils import get_module_from_name
 
@@ -71,8 +71,8 @@ class HqqHfQuantizer(HfQuantizer):
                 " sure the weights are in PyTorch format."
             )
 
-        if not torch.cuda.is_available():
-            raise RuntimeError("No GPU found. A GPU is needed for quantization.")
+        if not (torch.cuda.is_available() or is_torch_xpu_available()):
+            raise RuntimeError("No GPU or XPU found. A GPU or XPU is needed for quantization.")
 
         if self.torch_dtype is None:
             if "torch_dtype" in kwargs:
diff --git a/tests/models/chameleon/test_modeling_chameleon.py b/tests/models/chameleon/test_modeling_chameleon.py
index fb5847fd60..67baab37c0 100644
--- a/tests/models/chameleon/test_modeling_chameleon.py
+++ b/tests/models/chameleon/test_modeling_chameleon.py
@@ -416,7 +416,7 @@ class ChameleonIntegrationTest(unittest.TestCase):
         EXPECTED_TEXT_COMPLETIONS = Expectations(
             {
                 ("xpu", 3): [
-                    'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Altair. The star map is set against a black background, with the constellations visible in the night',
+                    'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Alpha Centauri. The star map is a representation of the night sky, showing the positions of stars in',
                     'What constellation is this image showing?The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.',
                 ],
                 ("cuda", 7): [
diff --git a/tests/quantization/hqq/test_hqq.py b/tests/quantization/hqq/test_hqq.py
index 37d91e9a25..a3aae71552 100755
--- a/tests/quantization/hqq/test_hqq.py
+++ b/tests/quantization/hqq/test_hqq.py
@@ -21,9 +21,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
 from transformers.testing_utils import (
     backend_empty_cache,
     require_accelerate,
+    require_deterministic_for_xpu,
     require_hqq,
-    require_torch_gpu,
-    require_torch_multi_gpu,
+    require_torch_accelerator,
+    require_torch_multi_accelerator,
     slow,
     torch_device,
 )
@@ -87,7 +88,7 @@ def check_forward(test_module, model, batch_size=1, context_size=1024):
 MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
 
-@require_torch_gpu
+@require_torch_accelerator
 @require_hqq
 class HqqConfigTest(unittest.TestCase):
     def test_to_dict(self):
@@ -101,7 +102,7 @@ class HqqConfigTest(unittest.TestCase):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 @require_accelerate
 @require_hqq
 class HQQTest(unittest.TestCase):
@@ -131,7 +132,6 @@ class HQQTest(unittest.TestCase):
             model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
         )
 
-        original_device = hqq_runner.model.model.layers[0].self_attn.v_proj.device
         check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
         check_forward(self, hqq_runner.model)
 
@@ -142,7 +142,7 @@ class HQQTest(unittest.TestCase):
         check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
         check_forward(self, hqq_runner.model)
 
-        hqq_runner.model.cuda(original_device)
+        hqq_runner.model.to(torch_device)
         check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
         check_forward(self, hqq_runner.model)
 
@@ -158,8 +158,8 @@ class HQQTest(unittest.TestCase):
 
 
 @slow
-@require_torch_gpu
-@require_torch_multi_gpu
+@require_torch_accelerator
+@require_torch_multi_accelerator
 @require_accelerate
 @require_hqq
 class HQQTestMultiGPU(unittest.TestCase):
@@ -182,7 +182,7 @@ class HQQTestMultiGPU(unittest.TestCase):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 @require_accelerate
 @require_hqq
 class HQQTestBias(unittest.TestCase):
@@ -202,6 +202,7 @@ class HQQTestBias(unittest.TestCase):
         check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
         check_forward(self, hqq_runner.model)
 
+    @require_deterministic_for_xpu
     def test_save_and_load_quantized_model(self):
         """
         Test saving and loading a quantized model with bias
@@ -237,7 +238,7 @@ class HQQTestBias(unittest.TestCase):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 @require_accelerate
 @require_hqq
 class HQQSerializationTest(unittest.TestCase):