FEAT / Bitsandbytes: Add dequantize API for bitsandbytes quantized models (#30806)

* add  method

* change method name

* more comments

* Apply suggestions from code review

Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* fixup

* add docstrings and fix comment

* warn users on the de-quantized dtype

* Update src/transformers/quantizers/base.py

Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>

* Update src/transformers/integrations/bitsandbytes.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* final suggestion - use private method

---------

Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Younes Belkada
2024-05-15 17:17:09 +02:00
committed by GitHub
parent 58faa7b824
commit 3f435823e0
9 changed files with 244 additions and 1 deletions

View File

@@ -239,6 +239,23 @@ class Bnb4BitTest(Base4bitTest):
self.assertIn(self.tokenizer.decode(output_sequences[0], skip_special_tokens=True), self.EXPECTED_OUTPUTS)
def test_generate_quality_dequantize(self):
r"""
Test that loading the model and unquantize it produce correct results
"""
bnb_config = BitsAndBytesConfig(load_in_4bit=True)
model_4bit = AutoModelForCausalLM.from_pretrained(
self.model_name, quantization_config=bnb_config, device_map="auto"
)
model_4bit.dequantize()
encoded_input = self.tokenizer(self.input_text, return_tensors="pt")
output_sequences = model_4bit.generate(input_ids=encoded_input["input_ids"].to(0), max_new_tokens=10)
self.assertIn(self.tokenizer.decode(output_sequences[0], skip_special_tokens=True), self.EXPECTED_OUTPUTS)
def test_device_and_dtype_assignment(self):
r"""
Test whether trying to cast (or assigning a device to) a model after converting it in 8-bit will throw an error.