fix hqq due to recent modeling changes (#36771)

* fix-hqq * style * test
2025-03-18 12:20:27 +01:00
parent e959530b8f
commit 3017536ebf
2 changed files with 32 additions and 1 deletions
--- a/tests/quantization/hqq/test_hqq.py
+++ b/tests/quantization/hqq/test_hqq.py
@@ -145,6 +145,28 @@ class HQQTestMultiGPU(unittest.TestCase):
        check_forward(self, hqq_runner.model)


+@slow
+@require_torch_gpu
+@require_accelerate
+@require_hqq
+class HQQTestBias(unittest.TestCase):
+    def tearDown(self):
+        cleanup()
+
+    def test_fp16_quantized_model(self):
+        """
+        Simple LLM model testing fp16 with bias
+        """
+        quant_config = HqqConfig(nbits=8, group_size=64)
+
+        hqq_runner = HQQLLMRunner(
+            model_id="facebook/opt-125m", quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
+        )
+
+        check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
+        check_forward(self, hqq_runner.model)
+
+
@slow
@require_torch_gpu
@require_accelerate