[AutoGPTQ] Add correct installation of GPTQ library + fix slow tests (#25713)
* add correct installation of GPTQ library * update tests values
This commit is contained in:
@@ -87,7 +87,8 @@ class GPTQTest(unittest.TestCase):
|
||||
EXPECTED_OUTPUTS = set()
|
||||
EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer. I")
|
||||
EXPECTED_OUTPUTS.add("Hello my name is John and I am a very good looking man.")
|
||||
EXPECTED_OUTPUTS.add("Hello my name is Alyson and I am a professional photographer")
|
||||
EXPECTED_OUTPUTS.add("Hello my name is Alyson, I am a student in the")
|
||||
EXPECTED_OUTPUTS.add("Hello my name is Alyson and I am a very sweet,")
|
||||
|
||||
# this seems a little small considering that we are doing 4bit quant but we have a small model and ww don't quantize the embeddings
|
||||
EXPECTED_RELATIVE_DIFFERENCE = 1.664253062
|
||||
@@ -215,7 +216,7 @@ class GPTQTest(unittest.TestCase):
|
||||
self.assertEqual(self.quantized_model.config.quantization_config.disable_exllama, True)
|
||||
# we need to put it directly to the gpu. Otherwise, we won't be able to initialize the exllama kernel
|
||||
quantized_model_from_saved = AutoModelForCausalLM.from_pretrained(
|
||||
tmpdirname, quantization_config=GPTQConfig(disable_exllama=False, bits=6), device_map={"": 0}
|
||||
tmpdirname, quantization_config=GPTQConfig(disable_exllama=False, bits=4), device_map={"": 0}
|
||||
)
|
||||
self.assertEqual(quantized_model_from_saved.config.quantization_config.disable_exllama, False)
|
||||
self.assertEqual(quantized_model_from_saved.config.quantization_config.bits, self.bits)
|
||||
|
||||
Reference in New Issue
Block a user