Update all references to canonical models (#29001)

* Script & Manual edition

* Update
This commit is contained in:
Lysandre Debut
2024-02-16 08:16:58 +01:00
committed by GitHub
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions

View File

@@ -108,7 +108,7 @@ class T5ModelTester:
self.decoder_layers = decoder_layers
def get_large_model_config(self):
return T5Config.from_pretrained("t5-base")
return T5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2)
@@ -942,7 +942,7 @@ class T5EncoderOnlyModelTester:
self.is_training = is_training
def get_large_model_config(self):
return T5Config.from_pretrained("t5-base")
return T5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
@@ -1096,36 +1096,40 @@ class T5ModelFp16Tests(unittest.TestCase):
with unittest.mock.patch("builtins.__import__", side_effect=import_accelerate_mock):
accelerate_available = False
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.float16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
# Load without in bf16
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load using `accelerate` in bf16
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16, device_map="auto")
model = T5ForConditionalGeneration.from_pretrained(
"google-t5/t5-small", torch_dtype=torch.bfloat16, device_map="auto"
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load using `accelerate` in bf16
model = T5ForConditionalGeneration.from_pretrained(
"t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
"google-t5/t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load without using `accelerate`
model = T5ForConditionalGeneration.from_pretrained(
"t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
"google-t5/t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
# Load using `accelerate`
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16, device_map="auto")
model = T5ForConditionalGeneration.from_pretrained(
"google-t5/t5-small", torch_dtype=torch.float16, device_map="auto"
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
@@ -1136,11 +1140,11 @@ class T5ModelFp16Tests(unittest.TestCase):
class T5ModelIntegrationTests(unittest.TestCase):
@cached_property
def model(self):
return T5ForConditionalGeneration.from_pretrained("t5-base").to(torch_device)
return T5ForConditionalGeneration.from_pretrained("google-t5/t5-base").to(torch_device)
@cached_property
def tokenizer(self):
return T5Tokenizer.from_pretrained("t5-base")
return T5Tokenizer.from_pretrained("google-t5/t5-base")
@slow
def test_torch_quant(self):
@@ -1157,11 +1161,11 @@ class T5ModelIntegrationTests(unittest.TestCase):
@slow
def test_small_generation(self):
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
model.config.max_length = 8
model.config.num_beams = 1
model.config.do_sample = False
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("summarize: Hello there", return_tensors="pt").input_ids.to(torch_device)
@@ -1184,8 +1188,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
"""
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("Hello there", return_tensors="pt").input_ids
labels = tokenizer("Hi I am", return_tensors="pt").input_ids
@@ -1501,7 +1505,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_fr(self):
model = self.model # t5-base
model = self.model # google-t5/t5-base
tok = self.tokenizer
use_task_specific_params(model, "translation_en_to_fr")