From c3700b0eee573276ac65bd28c85171a768cfc3b1 Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Tue, 25 Feb 2025 20:38:09 +0800 Subject: [PATCH] [tests] enable autoawq tests on XPU (#36327) add autoawq Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> --- tests/quantization/autoawq/test_awq.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/quantization/autoawq/test_awq.py b/tests/quantization/autoawq/test_awq.py index 780efe8aa4..5238c29a9c 100644 --- a/tests/quantization/autoawq/test_awq.py +++ b/tests/quantization/autoawq/test_awq.py @@ -19,9 +19,11 @@ import unittest from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, AwqConfig, OPTForCausalLM from transformers.testing_utils import ( + backend_empty_cache, require_accelerate, require_auto_awq, require_intel_extension_for_pytorch, + require_torch_accelerator, require_torch_gpu, require_torch_multi_gpu, slow, @@ -37,8 +39,9 @@ if is_accelerate_available(): from accelerate import init_empty_weights -@require_torch_gpu +@require_torch_accelerator class AwqConfigTest(unittest.TestCase): + @require_torch_gpu def test_wrong_backend(self): """ Simple test that checks if a user passes a wrong backend an error is raised @@ -90,7 +93,7 @@ class AwqConfigTest(unittest.TestCase): @slow -@require_torch_gpu +@require_torch_accelerator @require_auto_awq @require_accelerate class AwqTest(unittest.TestCase): @@ -107,7 +110,7 @@ class AwqTest(unittest.TestCase): "Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very out", "Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very creative", ] - device_map = "cuda" + device_map = torch_device # called only once for all test in this class @classmethod @@ -120,7 +123,7 @@ class AwqTest(unittest.TestCase): def tearDown(self): gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) gc.collect() def test_quantized_model_conversion(self): @@ -475,7 +478,7 @@ class AwqFusedTest(unittest.TestCase): @slow -@require_torch_gpu +@require_torch_accelerator @require_auto_awq @require_accelerate class AwqScaleTest(unittest.TestCase): @@ -488,7 +491,7 @@ class AwqScaleTest(unittest.TestCase): Simple test that checks if the scales have been replaced in the quantized model """ quantized_model = AutoModelForCausalLM.from_pretrained( - "TechxGenus/starcoder2-3b-AWQ", torch_dtype=torch.float16, device_map="cuda" + "TechxGenus/starcoder2-3b-AWQ", torch_dtype=torch.float16, device_map=torch_device ) self.assertTrue(isinstance(quantized_model.model.layers[0].mlp.act, ScaledActivation))