Update all references to canonical models (#29001)

* Script & Manual edition

* Update
This commit is contained in:
Lysandre Debut
2024-02-16 08:16:58 +01:00
committed by GitHub
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions

View File

@@ -773,8 +773,8 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
"""
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("Hello there", return_tensors="np").input_ids
labels = tokenizer("Hi I am", return_tensors="np").input_ids
@@ -849,11 +849,11 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_small_generation(self):
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
model.config.max_length = 8
model.config.num_beams = 1
model.config.do_sample = False
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("summarize: Hello there", return_tensors="np").input_ids
@@ -864,11 +864,11 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_small_generation_bfloat16(self):
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small", dtype=jnp.bfloat16)
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small", dtype=jnp.bfloat16)
model.config.max_length = 8
model.config.num_beams = 1
model.config.do_sample = False
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("summarize: Hello there", return_tensors="np").input_ids
@@ -879,8 +879,8 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_summarization(self):
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("t5-base")
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
FRANCE_ARTICLE = ( # @noqa
"Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings"

View File

@@ -108,7 +108,7 @@ class T5ModelTester:
self.decoder_layers = decoder_layers
def get_large_model_config(self):
return T5Config.from_pretrained("t5-base")
return T5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2)
@@ -942,7 +942,7 @@ class T5EncoderOnlyModelTester:
self.is_training = is_training
def get_large_model_config(self):
return T5Config.from_pretrained("t5-base")
return T5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
@@ -1096,36 +1096,40 @@ class T5ModelFp16Tests(unittest.TestCase):
with unittest.mock.patch("builtins.__import__", side_effect=import_accelerate_mock):
accelerate_available = False
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.float16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
# Load without in bf16
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load using `accelerate` in bf16
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16, device_map="auto")
model = T5ForConditionalGeneration.from_pretrained(
"google-t5/t5-small", torch_dtype=torch.bfloat16, device_map="auto"
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load using `accelerate` in bf16
model = T5ForConditionalGeneration.from_pretrained(
"t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
"google-t5/t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load without using `accelerate`
model = T5ForConditionalGeneration.from_pretrained(
"t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
"google-t5/t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
# Load using `accelerate`
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16, device_map="auto")
model = T5ForConditionalGeneration.from_pretrained(
"google-t5/t5-small", torch_dtype=torch.float16, device_map="auto"
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
@@ -1136,11 +1140,11 @@ class T5ModelFp16Tests(unittest.TestCase):
class T5ModelIntegrationTests(unittest.TestCase):
@cached_property
def model(self):
return T5ForConditionalGeneration.from_pretrained("t5-base").to(torch_device)
return T5ForConditionalGeneration.from_pretrained("google-t5/t5-base").to(torch_device)
@cached_property
def tokenizer(self):
return T5Tokenizer.from_pretrained("t5-base")
return T5Tokenizer.from_pretrained("google-t5/t5-base")
@slow
def test_torch_quant(self):
@@ -1157,11 +1161,11 @@ class T5ModelIntegrationTests(unittest.TestCase):
@slow
def test_small_generation(self):
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
model.config.max_length = 8
model.config.num_beams = 1
model.config.do_sample = False
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("summarize: Hello there", return_tensors="pt").input_ids.to(torch_device)
@@ -1184,8 +1188,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
"""
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("Hello there", return_tensors="pt").input_ids
labels = tokenizer("Hi I am", return_tensors="pt").input_ids
@@ -1501,7 +1505,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_fr(self):
model = self.model # t5-base
model = self.model # google-t5/t5-base
tok = self.tokenizer
use_task_specific_params(model, "translation_en_to_fr")

View File

@@ -302,7 +302,7 @@ class TFT5ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
@slow
def test_model_from_pretrained(self):
model = TFT5Model.from_pretrained("t5-small")
model = TFT5Model.from_pretrained("google-t5/t5-small")
self.assertIsNotNone(model)
def test_generate_with_headmasking(self):
@@ -448,8 +448,8 @@ class TFT5EncoderOnlyModelTest(TFModelTesterMixin, unittest.TestCase):
class TFT5GenerationIntegrationTests(unittest.TestCase):
@slow
def test_greedy_xla_generate_simple(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
# two examples with different lengths to confirm that attention masks are operational in XLA
sentences = [
@@ -476,8 +476,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
@slow
def test_greedy_generate(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
sentences = ["Yesterday, my name was", "Today is a beautiful day and"]
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
@@ -505,8 +505,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
# forces the generation to happen on CPU, to avoid GPU-related quirks
with tf.device(":/CPU:0"):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
sentence = "Translate English to German: I have two bananas"
input_ids = tokenizer(sentence, return_tensors="tf", padding=True).input_ids
@@ -526,8 +526,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
@slow
def test_sample_generate(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
sentences = ["I really love my", "Translate English to German: the transformers are truly amazing"]
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
@@ -557,8 +557,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
@unittest.skip("Skip for now as TF 2.13 breaks it on GPU")
@slow
def test_beam_search_xla_generate_simple(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
# tests XLA with task specific arguments
task_specific_config = getattr(model.config, "task_specific_params", {})
@@ -590,8 +590,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
@slow
def test_beam_search_generate(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
sentences = ["I really love my", "Translate English to German: the transformers are truly amazing"]
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
@@ -622,7 +622,7 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
class TFT5ModelIntegrationTests(unittest.TestCase):
@cached_property
def model(self):
return TFT5ForConditionalGeneration.from_pretrained("t5-base")
return TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
@slow
def test_small_integration_test(self):
@@ -638,8 +638,8 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
"""
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("Hello there", return_tensors="tf").input_ids
labels = tokenizer("Hi I am", return_tensors="tf").input_ids
@@ -703,7 +703,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_summarization(self):
model = self.model
tok = T5Tokenizer.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
FRANCE_ARTICLE = ( # @noqa
"Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings"
@@ -948,7 +948,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_de(self):
tok = T5Tokenizer.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
model = self.model
task_specific_config = getattr(model.config, "task_specific_params", {})
@@ -978,7 +978,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_fr(self):
model = self.model
tok = T5Tokenizer.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
task_specific_config = getattr(model.config, "task_specific_params", {})
translation_config = task_specific_config.get("translation_en_to_fr", {})
@@ -1015,7 +1015,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_ro(self):
model = self.model
tok = T5Tokenizer.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
task_specific_config = getattr(model.config, "task_specific_params", {})
translation_config = task_specific_config.get("translation_en_to_ro", {})

View File

@@ -138,11 +138,11 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@cached_property
def t5_base_tokenizer(self):
return T5Tokenizer.from_pretrained("t5-base")
return T5Tokenizer.from_pretrained("google-t5/t5-base")
@cached_property
def t5_base_tokenizer_fast(self):
return T5TokenizerFast.from_pretrained("t5-base")
return T5TokenizerFast.from_pretrained("google-t5/t5-base")
def get_tokenizer(self, **kwargs) -> T5Tokenizer:
return self.tokenizer_class.from_pretrained(self.tmpdirname, **kwargs)
@@ -373,7 +373,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,
model_name="t5-base",
model_name="google-t5/t5-base",
revision="5a7ff2d8f5117c194c7e32ec1ccbf04642cca99b",
)
@@ -400,7 +400,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010)))
def test_some_edge_cases(self):
tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base", legacy=False)
sp_tokens = tokenizer.sp_model.encode("</s>>", out_type=str)
self.assertEqual(sp_tokens, ["<", "/", "s", ">", ">"])
@@ -426,8 +426,8 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_fast_slow_edge_cases(self):
# We are testing spaces before and spaces after special tokens + space transformations
slow_tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
fast_tokenizer = T5TokenizerFast.from_pretrained("t5-base", legacy=False, from_slow=True)
slow_tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base", legacy=False)
fast_tokenizer = T5TokenizerFast.from_pretrained("google-t5/t5-base", legacy=False, from_slow=True)
slow_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=False))
fast_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=False))
@@ -445,7 +445,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
with self.subTest(f"fast {edge_case} normalized = False"):
self.assertEqual(fast_tokenizer.tokenize(hard_case), EXPECTED_SLOW)
fast_tokenizer = T5TokenizerFast.from_pretrained("t5-base", legacy=False, from_slow=True)
fast_tokenizer = T5TokenizerFast.from_pretrained("google-t5/t5-base", legacy=False, from_slow=True)
fast_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=True))
# `normalized=True` is the default normalization scheme when adding a token. Normalize -> don't strip the space.
@@ -604,7 +604,7 @@ class CommonSpmIntegrationTests(unittest.TestCase):
)
# Test with T5
hf_tokenizer = T5Tokenizer.from_pretrained("t5-small")
hf_tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
vocab_path = "gs://t5-data/vocabs/cc_all.32000/sentencepiece.model"
t5x_tokenizer = SentencePieceVocabulary(vocab_path, extra_ids=300)
for text in input_texts: