fix xpu failures on PT 2.7 and 2.8 w/o IPEX and enable hqq cases on XPU (#39187)

* chameleon xpu bnb groundtruth update on bnb triton backend since we are
deprecating ipex backend

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* enable hqq uts on XPU, all passed

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix style

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix comment

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

---------

Signed-off-by: YAO Matrix <matrix.yao@intel.com>
This commit is contained in:
Yao Matrix
2025-07-08 16:18:26 +08:00
committed by GitHub
parent 17b3c96c00
commit b2816da802
3 changed files with 15 additions and 14 deletions

View File

@@ -15,7 +15,7 @@
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
from ..integrations import prepare_for_hqq_linear from ..integrations import prepare_for_hqq_linear
from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, logging from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, is_torch_xpu_available, logging
from .base import HfQuantizer from .base import HfQuantizer
from .quantizers_utils import get_module_from_name from .quantizers_utils import get_module_from_name
@@ -71,8 +71,8 @@ class HqqHfQuantizer(HfQuantizer):
" sure the weights are in PyTorch format." " sure the weights are in PyTorch format."
) )
if not torch.cuda.is_available(): if not (torch.cuda.is_available() or is_torch_xpu_available()):
raise RuntimeError("No GPU found. A GPU is needed for quantization.") raise RuntimeError("No GPU or XPU found. A GPU or XPU is needed for quantization.")
if self.torch_dtype is None: if self.torch_dtype is None:
if "torch_dtype" in kwargs: if "torch_dtype" in kwargs:

View File

@@ -416,7 +416,7 @@ class ChameleonIntegrationTest(unittest.TestCase):
EXPECTED_TEXT_COMPLETIONS = Expectations( EXPECTED_TEXT_COMPLETIONS = Expectations(
{ {
("xpu", 3): [ ("xpu", 3): [
'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Altair. The star map is set against a black background, with the constellations visible in the night', 'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Alpha Centauri. The star map is a representation of the night sky, showing the positions of stars in',
'What constellation is this image showing?The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.', 'What constellation is this image showing?The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.',
], ],
("cuda", 7): [ ("cuda", 7): [

View File

@@ -21,9 +21,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
from transformers.testing_utils import ( from transformers.testing_utils import (
backend_empty_cache, backend_empty_cache,
require_accelerate, require_accelerate,
require_deterministic_for_xpu,
require_hqq, require_hqq,
require_torch_gpu, require_torch_accelerator,
require_torch_multi_gpu, require_torch_multi_accelerator,
slow, slow,
torch_device, torch_device,
) )
@@ -87,7 +88,7 @@ def check_forward(test_module, model, batch_size=1, context_size=1024):
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
@require_torch_gpu @require_torch_accelerator
@require_hqq @require_hqq
class HqqConfigTest(unittest.TestCase): class HqqConfigTest(unittest.TestCase):
def test_to_dict(self): def test_to_dict(self):
@@ -101,7 +102,7 @@ class HqqConfigTest(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_accelerate @require_accelerate
@require_hqq @require_hqq
class HQQTest(unittest.TestCase): class HQQTest(unittest.TestCase):
@@ -131,7 +132,6 @@ class HQQTest(unittest.TestCase):
model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
) )
original_device = hqq_runner.model.model.layers[0].self_attn.v_proj.device
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj) check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model) check_forward(self, hqq_runner.model)
@@ -142,7 +142,7 @@ class HQQTest(unittest.TestCase):
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj) check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model) check_forward(self, hqq_runner.model)
hqq_runner.model.cuda(original_device) hqq_runner.model.to(torch_device)
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj) check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model) check_forward(self, hqq_runner.model)
@@ -158,8 +158,8 @@ class HQQTest(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_torch_multi_gpu @require_torch_multi_accelerator
@require_accelerate @require_accelerate
@require_hqq @require_hqq
class HQQTestMultiGPU(unittest.TestCase): class HQQTestMultiGPU(unittest.TestCase):
@@ -182,7 +182,7 @@ class HQQTestMultiGPU(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_accelerate @require_accelerate
@require_hqq @require_hqq
class HQQTestBias(unittest.TestCase): class HQQTestBias(unittest.TestCase):
@@ -202,6 +202,7 @@ class HQQTestBias(unittest.TestCase):
check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj) check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model) check_forward(self, hqq_runner.model)
@require_deterministic_for_xpu
def test_save_and_load_quantized_model(self): def test_save_and_load_quantized_model(self):
""" """
Test saving and loading a quantized model with bias Test saving and loading a quantized model with bias
@@ -237,7 +238,7 @@ class HQQTestBias(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_accelerate @require_accelerate
@require_hqq @require_hqq
class HQQSerializationTest(unittest.TestCase): class HQQSerializationTest(unittest.TestCase):