From 34f4080ff59b1668d919a1ba9f8bc4a3a2a3f478 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 9 Dec 2024 13:55:16 -0500 Subject: [PATCH] [CI] Fix bnb quantization tests with accelerate>=1.2.0 (#35172) --- tests/quantization/bnb/test_4bit.py | 8 ++++---- tests/quantization/bnb/test_mixed_int8.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py index 3eae429abb..9512d0aa70 100644 --- a/tests/quantization/bnb/test_4bit.py +++ b/tests/quantization/bnb/test_4bit.py @@ -385,14 +385,14 @@ class Bnb4BitT5Test(unittest.TestCase): # test with `google-t5/t5-small` model = T5ForConditionalGeneration.from_pretrained(self.model_name, load_in_4bit=True, device_map="auto") - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) # test with `flan-t5-small` model = T5ForConditionalGeneration.from_pretrained( self.dense_act_model_name, load_in_4bit=True, device_map="auto" ) - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) T5ForConditionalGeneration._keep_in_fp32_modules = modules @@ -410,14 +410,14 @@ class Bnb4BitT5Test(unittest.TestCase): # there was a bug with decoders - this test checks that it is fixed self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear4bit)) - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) # test with `flan-t5-small` model = T5ForConditionalGeneration.from_pretrained( self.dense_act_model_name, load_in_4bit=True, device_map="auto" ) - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py index 567aa95627..158fdfaf71 100644 --- a/tests/quantization/bnb/test_mixed_int8.py +++ b/tests/quantization/bnb/test_mixed_int8.py @@ -514,14 +514,14 @@ class MixedInt8T5Test(unittest.TestCase): # test with `google-t5/t5-small` model = T5ForConditionalGeneration.from_pretrained(self.model_name, load_in_8bit=True, device_map="auto") - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) # test with `flan-t5-small` model = T5ForConditionalGeneration.from_pretrained( self.dense_act_model_name, load_in_8bit=True, device_map="auto" ) - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) T5ForConditionalGeneration._keep_in_fp32_modules = modules @@ -540,14 +540,14 @@ class MixedInt8T5Test(unittest.TestCase): # there was a bug with decoders - this test checks that it is fixed self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear8bitLt)) - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) # test with `flan-t5-small` model = T5ForConditionalGeneration.from_pretrained( self.dense_act_model_name, load_in_8bit=True, device_map="auto" ) - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) def test_inference_with_keep_in_fp32_serialized(self): @@ -571,14 +571,14 @@ class MixedInt8T5Test(unittest.TestCase): # there was a bug with decoders - this test checks that it is fixed self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear8bitLt)) - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input) # test with `flan-t5-small` model = T5ForConditionalGeneration.from_pretrained( self.dense_act_model_name, load_in_8bit=True, device_map="auto" ) - encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device) + encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device) _ = model.generate(**encoded_input)