fix xpu failures on PT 2.7 and 2.8 w/o IPEX and enable hqq cases on XPU (#39187)
* chameleon xpu bnb groundtruth update on bnb triton backend since we are deprecating ipex backend Signed-off-by: YAO Matrix <matrix.yao@intel.com> * enable hqq uts on XPU, all passed Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix style Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix comment Signed-off-by: YAO Matrix <matrix.yao@intel.com> --------- Signed-off-by: YAO Matrix <matrix.yao@intel.com>
This commit is contained in:
@@ -15,7 +15,7 @@
|
|||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
from ..integrations import prepare_for_hqq_linear
|
from ..integrations import prepare_for_hqq_linear
|
||||||
from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, logging
|
from ..utils import is_accelerate_available, is_hqq_available, is_torch_available, is_torch_xpu_available, logging
|
||||||
from .base import HfQuantizer
|
from .base import HfQuantizer
|
||||||
from .quantizers_utils import get_module_from_name
|
from .quantizers_utils import get_module_from_name
|
||||||
|
|
||||||
@@ -71,8 +71,8 @@ class HqqHfQuantizer(HfQuantizer):
|
|||||||
" sure the weights are in PyTorch format."
|
" sure the weights are in PyTorch format."
|
||||||
)
|
)
|
||||||
|
|
||||||
if not torch.cuda.is_available():
|
if not (torch.cuda.is_available() or is_torch_xpu_available()):
|
||||||
raise RuntimeError("No GPU found. A GPU is needed for quantization.")
|
raise RuntimeError("No GPU or XPU found. A GPU or XPU is needed for quantization.")
|
||||||
|
|
||||||
if self.torch_dtype is None:
|
if self.torch_dtype is None:
|
||||||
if "torch_dtype" in kwargs:
|
if "torch_dtype" in kwargs:
|
||||||
|
|||||||
@@ -416,7 +416,7 @@ class ChameleonIntegrationTest(unittest.TestCase):
|
|||||||
EXPECTED_TEXT_COMPLETIONS = Expectations(
|
EXPECTED_TEXT_COMPLETIONS = Expectations(
|
||||||
{
|
{
|
||||||
("xpu", 3): [
|
("xpu", 3): [
|
||||||
'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Altair. The star map is set against a black background, with the constellations visible in the night',
|
'Describe what do you see here and tell me about the history behind it?The image depicts a star map, with a bright blue dot in the center representing the star Alpha Centauri. The star map is a representation of the night sky, showing the positions of stars in',
|
||||||
'What constellation is this image showing?The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.',
|
'What constellation is this image showing?The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.The image shows the constellation of Orion.',
|
||||||
],
|
],
|
||||||
("cuda", 7): [
|
("cuda", 7): [
|
||||||
|
|||||||
@@ -21,9 +21,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
|
|||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
backend_empty_cache,
|
backend_empty_cache,
|
||||||
require_accelerate,
|
require_accelerate,
|
||||||
|
require_deterministic_for_xpu,
|
||||||
require_hqq,
|
require_hqq,
|
||||||
require_torch_gpu,
|
require_torch_accelerator,
|
||||||
require_torch_multi_gpu,
|
require_torch_multi_accelerator,
|
||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
)
|
)
|
||||||
@@ -87,7 +88,7 @@ def check_forward(test_module, model, batch_size=1, context_size=1024):
|
|||||||
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
||||||
|
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
@require_hqq
|
@require_hqq
|
||||||
class HqqConfigTest(unittest.TestCase):
|
class HqqConfigTest(unittest.TestCase):
|
||||||
def test_to_dict(self):
|
def test_to_dict(self):
|
||||||
@@ -101,7 +102,7 @@ class HqqConfigTest(unittest.TestCase):
|
|||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
@require_hqq
|
@require_hqq
|
||||||
class HQQTest(unittest.TestCase):
|
class HQQTest(unittest.TestCase):
|
||||||
@@ -131,7 +132,6 @@ class HQQTest(unittest.TestCase):
|
|||||||
model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
|
model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
|
||||||
)
|
)
|
||||||
|
|
||||||
original_device = hqq_runner.model.model.layers[0].self_attn.v_proj.device
|
|
||||||
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
|
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
|
||||||
check_forward(self, hqq_runner.model)
|
check_forward(self, hqq_runner.model)
|
||||||
|
|
||||||
@@ -142,7 +142,7 @@ class HQQTest(unittest.TestCase):
|
|||||||
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
|
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
|
||||||
check_forward(self, hqq_runner.model)
|
check_forward(self, hqq_runner.model)
|
||||||
|
|
||||||
hqq_runner.model.cuda(original_device)
|
hqq_runner.model.to(torch_device)
|
||||||
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
|
check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
|
||||||
check_forward(self, hqq_runner.model)
|
check_forward(self, hqq_runner.model)
|
||||||
|
|
||||||
@@ -158,8 +158,8 @@ class HQQTest(unittest.TestCase):
|
|||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_accelerator
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
@require_hqq
|
@require_hqq
|
||||||
class HQQTestMultiGPU(unittest.TestCase):
|
class HQQTestMultiGPU(unittest.TestCase):
|
||||||
@@ -182,7 +182,7 @@ class HQQTestMultiGPU(unittest.TestCase):
|
|||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
@require_hqq
|
@require_hqq
|
||||||
class HQQTestBias(unittest.TestCase):
|
class HQQTestBias(unittest.TestCase):
|
||||||
@@ -202,6 +202,7 @@ class HQQTestBias(unittest.TestCase):
|
|||||||
check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
|
check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
|
||||||
check_forward(self, hqq_runner.model)
|
check_forward(self, hqq_runner.model)
|
||||||
|
|
||||||
|
@require_deterministic_for_xpu
|
||||||
def test_save_and_load_quantized_model(self):
|
def test_save_and_load_quantized_model(self):
|
||||||
"""
|
"""
|
||||||
Test saving and loading a quantized model with bias
|
Test saving and loading a quantized model with bias
|
||||||
@@ -237,7 +238,7 @@ class HQQTestBias(unittest.TestCase):
|
|||||||
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
@require_accelerate
|
@require_accelerate
|
||||||
@require_hqq
|
@require_hqq
|
||||||
class HQQSerializationTest(unittest.TestCase):
|
class HQQSerializationTest(unittest.TestCase):
|
||||||
|
|||||||
Reference in New Issue
Block a user