FIX [quantization / ESM] Fix ESM 8bit / 4bit with bitsandbytes (#29329)

* fix ESM 8bit

* Apply suggestions from code review

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* fixup

---------

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
Younes Belkada
2024-03-01 03:01:53 +01:00
committed by GitHub
parent 2858d6c634
commit 50db7ca4e8
4 changed files with 20 additions and 6 deletions

View File

@@ -18,7 +18,7 @@
import unittest
from transformers import EsmConfig, is_torch_available
from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device
from transformers.testing_utils import TestCasePlus, require_bitsandbytes, require_torch, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
@@ -303,9 +303,9 @@ class EsmModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
pass
@slow
@require_torch
class EsmModelIntegrationTest(TestCasePlus):
@slow
def test_inference_masked_lm(self):
with torch.no_grad():
model = EsmForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
@@ -323,7 +323,6 @@ class EsmModelIntegrationTest(TestCasePlus):
)
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
@slow
def test_inference_no_head(self):
with torch.no_grad():
model = EsmModel.from_pretrained("facebook/esm2_t6_8M_UR50D")
@@ -336,3 +335,18 @@ class EsmModelIntegrationTest(TestCasePlus):
[[[0.1444, 0.5413, 0.3248], [0.3034, 0.0053, 0.3108], [0.3228, -0.2499, 0.3415]]]
)
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
@require_bitsandbytes
def test_inference_bitsandbytes(self):
model = EsmForMaskedLM.from_pretrained("facebook/esm2_t36_3B_UR50D", load_in_8bit=True)
input_ids = torch.tensor([[0, 6, 4, 13, 5, 4, 16, 12, 11, 7, 2]])
# Just test if inference works
with torch.no_grad():
_ = model(input_ids)[0]
model = EsmForMaskedLM.from_pretrained("facebook/esm2_t36_3B_UR50D", load_in_4bit=True)
input_ids = torch.tensor([[0, 6, 4, 13, 5, 4, 16, 12, 11, 7, 2]])
# Just test if inference works
_ = model(input_ids)[0]