fix xpu failures on PT 2.7 and 2.8 w/o IPEX and enable hqq cases on XPU (#39187)

* chameleon xpu bnb groundtruth update on bnb triton backend since we are deprecating ipex backend Signed-off-by: YAO Matrix <matrix.yao@intel.com> * enable hqq uts on XPU, all passed Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix style Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix comment Signed-off-by: YAO Matrix <matrix.yao@intel.com> --------- Signed-off-by: YAO Matrix <matrix.yao@intel.com>
2025-07-08 16:18:26 +08:00
parent 17b3c96c00
commit b2816da802
3 changed files with 15 additions and 14 deletions
--- a/src/transformers/quantizers/quantizer_hqq.py
+++ b/src/transformers/quantizers/quantizer_hqq.py
@@ -15,7 +15,7 @@
 from typing import TYPE_CHECKING, Any
 from ..integrations import prepare_for_hqq_linear
-from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, logging
+from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, is_torch_xpu_available, logging
 from .base import HfQuantizer
 from .quantizers_utils import get_module_from_name
@@ -71,8 +71,8 @@ class HqqHfQuantizer(HfQuantizer):
                " sure the weights are in PyTorch format."
            )
-        if not torch.cuda.is_available():
+        if not (torch.cuda.is_available() or is_torch_xpu_available()):
-            raise RuntimeError("No GPU found. A GPU is needed for quantization.")
+            raise RuntimeError("No GPU or XPU found. A GPU or XPU is needed for quantization.")
        if self.torch_dtype is None:
            if "torch_dtype" in kwargs:
--- a/tests/models/chameleon/test_modeling_chameleon.py
+++ b/tests/models/chameleon/test_modeling_chameleon.py
@@ -416,7 +416,7 @@ class ChameleonIntegrationTest(unittest.TestCase):
        EXPECTED_TEXT_COMPLETIONS = Expectations(
            {
                ("xpu", 3): [
-                    'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Altair. The star map is set against a black background, with the constellations visible in the night',
+                    'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Alpha Centauri. The star map is a representation of the night sky, showing the positions of stars in',
                    'What constellation is this image showing?The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.',
                ],
                ("cuda", 7): [
--- a/tests/quantization/hqq/test_hqq.py
+++ b/tests/quantization/hqq/test_hqq.py
@@ -21,9 +21,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
 from transformers.testing_utils import (
    backend_empty_cache,
    require_accelerate,
    require_deterministic_for_xpu,
    require_hqq,
-    require_torch_gpu,
+    require_torch_accelerator,
-    require_torch_multi_gpu,
+    require_torch_multi_accelerator,
    slow,
    torch_device,
 )
@@ -87,7 +88,7 @@ def check_forward(test_module, model, batch_size=1, context_size=1024):
 MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-@require_torch_gpu
+@require_torch_accelerator
@require_hqq
 class HqqConfigTest(unittest.TestCase):
    def test_to_dict(self):
@@ -101,7 +102,7 @@ class HqqConfigTest(unittest.TestCase):
@slow
-@require_torch_gpu
+@require_torch_accelerator
@require_accelerate
@require_hqq
 class HQQTest(unittest.TestCase):
@@ -131,7 +132,6 @@ class HQQTest(unittest.TestCase):
            model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
        )
        original_device = hqq_runner.model.model.layers[0].self_attn.v_proj.device
        check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
        check_forward(self, hqq_runner.model)
@@ -142,7 +142,7 @@ class HQQTest(unittest.TestCase):
        check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
        check_forward(self, hqq_runner.model)
-        hqq_runner.model.cuda(original_device)
+        hqq_runner.model.to(torch_device)
        check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
        check_forward(self, hqq_runner.model)
@@ -158,8 +158,8 @@ class HQQTest(unittest.TestCase):
@slow
-@require_torch_gpu
+@require_torch_accelerator
-@require_torch_multi_gpu
+@require_torch_multi_accelerator
@require_accelerate
@require_hqq
 class HQQTestMultiGPU(unittest.TestCase):
@@ -182,7 +182,7 @@ class HQQTestMultiGPU(unittest.TestCase):
@slow
-@require_torch_gpu
+@require_torch_accelerator
@require_accelerate
@require_hqq
 class HQQTestBias(unittest.TestCase):
@@ -202,6 +202,7 @@ class HQQTestBias(unittest.TestCase):
        check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
        check_forward(self, hqq_runner.model)
    @require_deterministic_for_xpu
    def test_save_and_load_quantized_model(self):
        """
        Test saving and loading a quantized model with bias
@@ -237,7 +238,7 @@ class HQQTestBias(unittest.TestCase):
@slow
-@require_torch_gpu
+@require_torch_accelerator
@require_accelerate
@require_hqq
 class HQQSerializationTest(unittest.TestCase):