From b2816da8021b4e7568cb1e840a5d9aa1357c26a7 Mon Sep 17 00:00:00 2001 From: Yao Matrix Date: Tue, 8 Jul 2025 16:18:26 +0800 Subject: [PATCH] fix xpu failures on PT 2.7 and 2.8 w/o IPEX and enable hqq cases on XPU (#39187) * chameleon xpu bnb groundtruth update on bnb triton backend since we are deprecating ipex backend Signed-off-by: YAO Matrix * enable hqq uts on XPU, all passed Signed-off-by: YAO Matrix * fix style Signed-off-by: YAO Matrix * fix comment Signed-off-by: YAO Matrix --------- Signed-off-by: YAO Matrix --- src/transformers/quantizers/quantizer_hqq.py | 6 +++--- .../chameleon/test_modeling_chameleon.py | 2 +- tests/quantization/hqq/test_hqq.py | 21 ++++++++++--------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/transformers/quantizers/quantizer_hqq.py b/src/transformers/quantizers/quantizer_hqq.py index 6061c72c24..160a689d80 100755 --- a/src/transformers/quantizers/quantizer_hqq.py +++ b/src/transformers/quantizers/quantizer_hqq.py @@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, Any from ..integrations import prepare_for_hqq_linear -from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, logging +from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, is_torch_xpu_available, logging from .base import HfQuantizer from .quantizers_utils import get_module_from_name @@ -71,8 +71,8 @@ class HqqHfQuantizer(HfQuantizer): " sure the weights are in PyTorch format." ) - if not torch.cuda.is_available(): - raise RuntimeError("No GPU found. A GPU is needed for quantization.") + if not (torch.cuda.is_available() or is_torch_xpu_available()): + raise RuntimeError("No GPU or XPU found. A GPU or XPU is needed for quantization.") if self.torch_dtype is None: if "torch_dtype" in kwargs: diff --git a/tests/models/chameleon/test_modeling_chameleon.py b/tests/models/chameleon/test_modeling_chameleon.py index fb5847fd60..67baab37c0 100644 --- a/tests/models/chameleon/test_modeling_chameleon.py +++ b/tests/models/chameleon/test_modeling_chameleon.py @@ -416,7 +416,7 @@ class ChameleonIntegrationTest(unittest.TestCase): EXPECTED_TEXT_COMPLETIONS = Expectations( { ("xpu", 3): [ - 'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Altair. The star map is set against a black background, with the constellations visible in the night', + 'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Alpha Centauri. The star map is a representation of the night sky, showing the positions of stars in', 'What constellation is this image showing?The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.', ], ("cuda", 7): [ diff --git a/tests/quantization/hqq/test_hqq.py b/tests/quantization/hqq/test_hqq.py index 37d91e9a25..a3aae71552 100755 --- a/tests/quantization/hqq/test_hqq.py +++ b/tests/quantization/hqq/test_hqq.py @@ -21,9 +21,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig from transformers.testing_utils import ( backend_empty_cache, require_accelerate, + require_deterministic_for_xpu, require_hqq, - require_torch_gpu, - require_torch_multi_gpu, + require_torch_accelerator, + require_torch_multi_accelerator, slow, torch_device, ) @@ -87,7 +88,7 @@ def check_forward(test_module, model, batch_size=1, context_size=1024): MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" -@require_torch_gpu +@require_torch_accelerator @require_hqq class HqqConfigTest(unittest.TestCase): def test_to_dict(self): @@ -101,7 +102,7 @@ class HqqConfigTest(unittest.TestCase): @slow -@require_torch_gpu +@require_torch_accelerator @require_accelerate @require_hqq class HQQTest(unittest.TestCase): @@ -131,7 +132,6 @@ class HQQTest(unittest.TestCase): model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device ) - original_device = hqq_runner.model.model.layers[0].self_attn.v_proj.device check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj) check_forward(self, hqq_runner.model) @@ -142,7 +142,7 @@ class HQQTest(unittest.TestCase): check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj) check_forward(self, hqq_runner.model) - hqq_runner.model.cuda(original_device) + hqq_runner.model.to(torch_device) check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj) check_forward(self, hqq_runner.model) @@ -158,8 +158,8 @@ class HQQTest(unittest.TestCase): @slow -@require_torch_gpu -@require_torch_multi_gpu +@require_torch_accelerator +@require_torch_multi_accelerator @require_accelerate @require_hqq class HQQTestMultiGPU(unittest.TestCase): @@ -182,7 +182,7 @@ class HQQTestMultiGPU(unittest.TestCase): @slow -@require_torch_gpu +@require_torch_accelerator @require_accelerate @require_hqq class HQQTestBias(unittest.TestCase): @@ -202,6 +202,7 @@ class HQQTestBias(unittest.TestCase): check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj) check_forward(self, hqq_runner.model) + @require_deterministic_for_xpu def test_save_and_load_quantized_model(self): """ Test saving and loading a quantized model with bias @@ -237,7 +238,7 @@ class HQQTestBias(unittest.TestCase): @slow -@require_torch_gpu +@require_torch_accelerator @require_accelerate @require_hqq class HQQSerializationTest(unittest.TestCase):