Update all references to canonical models (#29001)
* Script & Manual edition * Update
This commit is contained in:
@@ -43,7 +43,7 @@ from transformers.testing_utils import (
|
||||
|
||||
|
||||
def get_some_linear_layer(model):
|
||||
if model.config.model_type == "gpt2":
|
||||
if model.config.model_type == "openai-community/gpt2":
|
||||
return model.transformer.h[0].mlp.c_fc
|
||||
elif model.config.model_type == "opt":
|
||||
try:
|
||||
@@ -283,7 +283,7 @@ class Bnb4BitTest(Base4bitTest):
|
||||
r"""
|
||||
Test whether it is possible to mix both `4bit` and `fp32` weights when using `keep_in_fp32_modules` correctly.
|
||||
"""
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small", load_in_4bit=True, device_map="auto")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small", load_in_4bit=True, device_map="auto")
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
|
||||
|
||||
|
||||
@@ -295,7 +295,7 @@ class Bnb4BitTest(Base4bitTest):
|
||||
class Bnb4BitT5Test(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model_name = "t5-small"
|
||||
cls.model_name = "google-t5/t5-small"
|
||||
cls.dense_act_model_name = "google/flan-t5-small" # flan-t5 uses dense-act instead of dense-relu-dense
|
||||
cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_name)
|
||||
cls.input_text = "Translate in German: Hello, my dog is cute"
|
||||
@@ -311,7 +311,7 @@ class Bnb4BitT5Test(unittest.TestCase):
|
||||
def test_inference_without_keep_in_fp32(self):
|
||||
r"""
|
||||
Test whether it is possible to mix both `4bit` and `fp32` weights when using `keep_in_fp32_modules` correctly.
|
||||
`flan-t5-small` uses `T5DenseGatedActDense` whereas `t5-small` uses `T5DenseReluDense`. We need to test
|
||||
`flan-t5-small` uses `T5DenseGatedActDense` whereas `google-t5/t5-small` uses `T5DenseReluDense`. We need to test
|
||||
both cases.
|
||||
"""
|
||||
from transformers import T5ForConditionalGeneration
|
||||
@@ -319,7 +319,7 @@ class Bnb4BitT5Test(unittest.TestCase):
|
||||
modules = T5ForConditionalGeneration._keep_in_fp32_modules
|
||||
T5ForConditionalGeneration._keep_in_fp32_modules = None
|
||||
|
||||
# test with `t5-small`
|
||||
# test with `google-t5/t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(self.model_name, load_in_4bit=True, device_map="auto")
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(0)
|
||||
_ = model.generate(**encoded_input)
|
||||
@@ -335,12 +335,12 @@ class Bnb4BitT5Test(unittest.TestCase):
|
||||
def test_inference_with_keep_in_fp32(self):
|
||||
r"""
|
||||
Test whether it is possible to mix both `4bit` and `fp32` weights when using `keep_in_fp32_modules` correctly.
|
||||
`flan-t5-small` uses `T5DenseGatedActDense` whereas `t5-small` uses `T5DenseReluDense`. We need to test
|
||||
`flan-t5-small` uses `T5DenseGatedActDense` whereas `google-t5/t5-small` uses `T5DenseReluDense`. We need to test
|
||||
both cases.
|
||||
"""
|
||||
from transformers import T5ForConditionalGeneration
|
||||
|
||||
# test with `t5-small`
|
||||
# test with `google-t5/t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(self.model_name, load_in_4bit=True, device_map="auto")
|
||||
|
||||
# there was a bug with decoders - this test checks that it is fixed
|
||||
@@ -362,7 +362,7 @@ class Classes4BitModelTest(Base4bitTest):
|
||||
super().setUp()
|
||||
# model_name
|
||||
self.model_name = "bigscience/bloom-560m"
|
||||
self.seq_to_seq_name = "t5-small"
|
||||
self.seq_to_seq_name = "google-t5/t5-small"
|
||||
|
||||
# Different types of model
|
||||
|
||||
@@ -509,7 +509,7 @@ class Bnb4BitTestTraining(Base4bitTest):
|
||||
|
||||
|
||||
class Bnb4BitGPT2Test(Bnb4BitTest):
|
||||
model_name = "gpt2-xl"
|
||||
model_name = "openai-community/gpt2-xl"
|
||||
EXPECTED_RELATIVE_DIFFERENCE = 3.3191854854152187
|
||||
|
||||
|
||||
@@ -647,7 +647,7 @@ class GPTSerializationTest(BaseSerializationTest):
|
||||
default BaseSerializationTest config tested with GPT family model
|
||||
"""
|
||||
|
||||
model_name = "gpt2-xl"
|
||||
model_name = "openai-community/gpt2-xl"
|
||||
|
||||
|
||||
@require_bitsandbytes
|
||||
|
||||
Reference in New Issue
Block a user