fix xpu failures on PT 2.7 and 2.8 w/o IPEX and enable hqq cases on XPU (#39187)

* chameleon xpu bnb groundtruth update on bnb triton backend since we are
deprecating ipex backend

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* enable hqq uts on XPU, all passed

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix style

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix comment

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

---------

Signed-off-by: YAO Matrix <matrix.yao@intel.com>
This commit is contained in:
Yao Matrix
2025-07-08 16:18:26 +08:00
committed by GitHub
parent 17b3c96c00
commit b2816da802
3 changed files with 15 additions and 14 deletions

View File

@@ -416,7 +416,7 @@ class ChameleonIntegrationTest(unittest.TestCase):
EXPECTED_TEXT_COMPLETIONS = Expectations(
{
("xpu", 3): [
'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Altair. The star map is set against a black background, with the constellations visible in the night',
'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Alpha Centauri. The star map is a representation of the night sky, showing the positions of stars in',
'What constellation is this image showing?The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.',
],
("cuda", 7): [

View File

@@ -21,9 +21,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
from transformers.testing_utils import (
backend_empty_cache,
require_accelerate,
require_deterministic_for_xpu,
require_hqq,
require_torch_gpu,
require_torch_multi_gpu,
require_torch_accelerator,
require_torch_multi_accelerator,
slow,
torch_device,
)
@@ -87,7 +88,7 @@ def check_forward(test_module, model, batch_size=1, context_size=1024):
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
@require_torch_gpu
@require_torch_accelerator
@require_hqq
class HqqConfigTest(unittest.TestCase):
def test_to_dict(self):
@@ -101,7 +102,7 @@ class HqqConfigTest(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
@require_accelerate
@require_hqq
class HQQTest(unittest.TestCase):
@@ -131,7 +132,6 @@ class HQQTest(unittest.TestCase):
model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
)
original_device = hqq_runner.model.model.layers[0].self_attn.v_proj.device
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model)
@@ -142,7 +142,7 @@ class HQQTest(unittest.TestCase):
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model)
hqq_runner.model.cuda(original_device)
hqq_runner.model.to(torch_device)
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model)
@@ -158,8 +158,8 @@ class HQQTest(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_multi_gpu
@require_torch_accelerator
@require_torch_multi_accelerator
@require_accelerate
@require_hqq
class HQQTestMultiGPU(unittest.TestCase):
@@ -182,7 +182,7 @@ class HQQTestMultiGPU(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
@require_accelerate
@require_hqq
class HQQTestBias(unittest.TestCase):
@@ -202,6 +202,7 @@ class HQQTestBias(unittest.TestCase):
check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model)
@require_deterministic_for_xpu
def test_save_and_load_quantized_model(self):
"""
Test saving and loading a quantized model with bias
@@ -237,7 +238,7 @@ class HQQTestBias(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
@require_accelerate
@require_hqq
class HQQSerializationTest(unittest.TestCase):