fix hqq due to recent modeling changes (#36771)

* fix-hqq

* style

* test
This commit is contained in:
Marc Sun
2025-03-18 12:20:27 +01:00
committed by GitHub
parent e959530b8f
commit 3017536ebf
2 changed files with 32 additions and 1 deletions

View File

@@ -145,6 +145,28 @@ class HQQTestMultiGPU(unittest.TestCase):
check_forward(self, hqq_runner.model)
@slow
@require_torch_gpu
@require_accelerate
@require_hqq
class HQQTestBias(unittest.TestCase):
def tearDown(self):
cleanup()
def test_fp16_quantized_model(self):
"""
Simple LLM model testing fp16 with bias
"""
quant_config = HqqConfig(nbits=8, group_size=64)
hqq_runner = HQQLLMRunner(
model_id="facebook/opt-125m", quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
)
check_hqqlayer(self, hqq_runner.model.model.decoder.layers[0].self_attn.v_proj)
check_forward(self, hqq_runner.model)
@slow
@require_torch_gpu
@require_accelerate