[tests] enable autoawq tests on XPU (#36327)

add autoawq

Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
Fanli Lin
2025-02-25 20:38:09 +08:00
committed by GitHub
parent b4b9da6d9b
commit c3700b0eee

View File

@@ -19,9 +19,11 @@ import unittest
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, AwqConfig, OPTForCausalLM from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, AwqConfig, OPTForCausalLM
from transformers.testing_utils import ( from transformers.testing_utils import (
backend_empty_cache,
require_accelerate, require_accelerate,
require_auto_awq, require_auto_awq,
require_intel_extension_for_pytorch, require_intel_extension_for_pytorch,
require_torch_accelerator,
require_torch_gpu, require_torch_gpu,
require_torch_multi_gpu, require_torch_multi_gpu,
slow, slow,
@@ -37,8 +39,9 @@ if is_accelerate_available():
from accelerate import init_empty_weights from accelerate import init_empty_weights
@require_torch_gpu @require_torch_accelerator
class AwqConfigTest(unittest.TestCase): class AwqConfigTest(unittest.TestCase):
@require_torch_gpu
def test_wrong_backend(self): def test_wrong_backend(self):
""" """
Simple test that checks if a user passes a wrong backend an error is raised Simple test that checks if a user passes a wrong backend an error is raised
@@ -90,7 +93,7 @@ class AwqConfigTest(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_auto_awq @require_auto_awq
@require_accelerate @require_accelerate
class AwqTest(unittest.TestCase): class AwqTest(unittest.TestCase):
@@ -107,7 +110,7 @@ class AwqTest(unittest.TestCase):
"Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very out", "Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very out",
"Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very creative", "Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very creative",
] ]
device_map = "cuda" device_map = torch_device
# called only once for all test in this class # called only once for all test in this class
@classmethod @classmethod
@@ -120,7 +123,7 @@ class AwqTest(unittest.TestCase):
def tearDown(self): def tearDown(self):
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
gc.collect() gc.collect()
def test_quantized_model_conversion(self): def test_quantized_model_conversion(self):
@@ -475,7 +478,7 @@ class AwqFusedTest(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_auto_awq @require_auto_awq
@require_accelerate @require_accelerate
class AwqScaleTest(unittest.TestCase): class AwqScaleTest(unittest.TestCase):
@@ -488,7 +491,7 @@ class AwqScaleTest(unittest.TestCase):
Simple test that checks if the scales have been replaced in the quantized model Simple test that checks if the scales have been replaced in the quantized model
""" """
quantized_model = AutoModelForCausalLM.from_pretrained( quantized_model = AutoModelForCausalLM.from_pretrained(
"TechxGenus/starcoder2-3b-AWQ", torch_dtype=torch.float16, device_map="cuda" "TechxGenus/starcoder2-3b-AWQ", torch_dtype=torch.float16, device_map=torch_device
) )
self.assertTrue(isinstance(quantized_model.model.layers[0].mlp.act, ScaledActivation)) self.assertTrue(isinstance(quantized_model.model.layers[0].mlp.act, ScaledActivation))