Update all references to canonical models (#29001)
* Script & Manual edition * Update
This commit is contained in:
@@ -773,8 +773,8 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
|
||||
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
|
||||
"""
|
||||
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("Hello there", return_tensors="np").input_ids
|
||||
labels = tokenizer("Hi I am", return_tensors="np").input_ids
|
||||
@@ -849,11 +849,11 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_small_generation(self):
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
model.config.max_length = 8
|
||||
model.config.num_beams = 1
|
||||
model.config.do_sample = False
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("summarize: Hello there", return_tensors="np").input_ids
|
||||
|
||||
@@ -864,11 +864,11 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_small_generation_bfloat16(self):
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small", dtype=jnp.bfloat16)
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small", dtype=jnp.bfloat16)
|
||||
model.config.max_length = 8
|
||||
model.config.num_beams = 1
|
||||
model.config.do_sample = False
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("summarize: Hello there", return_tensors="np").input_ids
|
||||
|
||||
@@ -879,8 +879,8 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_summarization(self):
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
FRANCE_ARTICLE = ( # @noqa
|
||||
"Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings"
|
||||
|
||||
@@ -108,7 +108,7 @@ class T5ModelTester:
|
||||
self.decoder_layers = decoder_layers
|
||||
|
||||
def get_large_model_config(self):
|
||||
return T5Config.from_pretrained("t5-base")
|
||||
return T5Config.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2)
|
||||
@@ -942,7 +942,7 @@ class T5EncoderOnlyModelTester:
|
||||
self.is_training = is_training
|
||||
|
||||
def get_large_model_config(self):
|
||||
return T5Config.from_pretrained("t5-base")
|
||||
return T5Config.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
|
||||
@@ -1096,36 +1096,40 @@ class T5ModelFp16Tests(unittest.TestCase):
|
||||
with unittest.mock.patch("builtins.__import__", side_effect=import_accelerate_mock):
|
||||
accelerate_available = False
|
||||
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16)
|
||||
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.float16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
|
||||
|
||||
# Load without in bf16
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16)
|
||||
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
|
||||
|
||||
# Load using `accelerate` in bf16
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
"google-t5/t5-small", torch_dtype=torch.bfloat16, device_map="auto"
|
||||
)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
|
||||
|
||||
# Load using `accelerate` in bf16
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
"t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
|
||||
"google-t5/t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
|
||||
)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
|
||||
|
||||
# Load without using `accelerate`
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
"t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
|
||||
"google-t5/t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
|
||||
)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
|
||||
|
||||
# Load using `accelerate`
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16, device_map="auto")
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
"google-t5/t5-small", torch_dtype=torch.float16, device_map="auto"
|
||||
)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
|
||||
|
||||
@@ -1136,11 +1140,11 @@ class T5ModelFp16Tests(unittest.TestCase):
|
||||
class T5ModelIntegrationTests(unittest.TestCase):
|
||||
@cached_property
|
||||
def model(self):
|
||||
return T5ForConditionalGeneration.from_pretrained("t5-base").to(torch_device)
|
||||
return T5ForConditionalGeneration.from_pretrained("google-t5/t5-base").to(torch_device)
|
||||
|
||||
@cached_property
|
||||
def tokenizer(self):
|
||||
return T5Tokenizer.from_pretrained("t5-base")
|
||||
return T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
@slow
|
||||
def test_torch_quant(self):
|
||||
@@ -1157,11 +1161,11 @@ class T5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_small_generation(self):
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
|
||||
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
|
||||
model.config.max_length = 8
|
||||
model.config.num_beams = 1
|
||||
model.config.do_sample = False
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("summarize: Hello there", return_tensors="pt").input_ids.to(torch_device)
|
||||
|
||||
@@ -1184,8 +1188,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
|
||||
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
|
||||
"""
|
||||
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("Hello there", return_tensors="pt").input_ids
|
||||
labels = tokenizer("Hi I am", return_tensors="pt").input_ids
|
||||
@@ -1501,7 +1505,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_translation_en_to_fr(self):
|
||||
model = self.model # t5-base
|
||||
model = self.model # google-t5/t5-base
|
||||
tok = self.tokenizer
|
||||
use_task_specific_params(model, "translation_en_to_fr")
|
||||
|
||||
|
||||
@@ -302,7 +302,7 @@ class TFT5ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
model = TFT5Model.from_pretrained("t5-small")
|
||||
model = TFT5Model.from_pretrained("google-t5/t5-small")
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
def test_generate_with_headmasking(self):
|
||||
@@ -448,8 +448,8 @@ class TFT5EncoderOnlyModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_greedy_xla_generate_simple(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
# two examples with different lengths to confirm that attention masks are operational in XLA
|
||||
sentences = [
|
||||
@@ -476,8 +476,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_greedy_generate(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
sentences = ["Yesterday, my name was", "Today is a beautiful day and"]
|
||||
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
|
||||
@@ -505,8 +505,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
|
||||
# forces the generation to happen on CPU, to avoid GPU-related quirks
|
||||
with tf.device(":/CPU:0"):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
sentence = "Translate English to German: I have two bananas"
|
||||
input_ids = tokenizer(sentence, return_tensors="tf", padding=True).input_ids
|
||||
@@ -526,8 +526,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_sample_generate(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
sentences = ["I really love my", "Translate English to German: the transformers are truly amazing"]
|
||||
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
|
||||
@@ -557,8 +557,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
@unittest.skip("Skip for now as TF 2.13 breaks it on GPU")
|
||||
@slow
|
||||
def test_beam_search_xla_generate_simple(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
# tests XLA with task specific arguments
|
||||
task_specific_config = getattr(model.config, "task_specific_params", {})
|
||||
@@ -590,8 +590,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_beam_search_generate(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
sentences = ["I really love my", "Translate English to German: the transformers are truly amazing"]
|
||||
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
|
||||
@@ -622,7 +622,7 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
@cached_property
|
||||
def model(self):
|
||||
return TFT5ForConditionalGeneration.from_pretrained("t5-base")
|
||||
return TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
|
||||
|
||||
@slow
|
||||
def test_small_integration_test(self):
|
||||
@@ -638,8 +638,8 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
|
||||
"""
|
||||
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("Hello there", return_tensors="tf").input_ids
|
||||
labels = tokenizer("Hi I am", return_tensors="tf").input_ids
|
||||
@@ -703,7 +703,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_summarization(self):
|
||||
model = self.model
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
FRANCE_ARTICLE = ( # @noqa
|
||||
"Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings"
|
||||
@@ -948,7 +948,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_translation_en_to_de(self):
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
model = self.model
|
||||
|
||||
task_specific_config = getattr(model.config, "task_specific_params", {})
|
||||
@@ -978,7 +978,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_translation_en_to_fr(self):
|
||||
model = self.model
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
task_specific_config = getattr(model.config, "task_specific_params", {})
|
||||
translation_config = task_specific_config.get("translation_en_to_fr", {})
|
||||
@@ -1015,7 +1015,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_translation_en_to_ro(self):
|
||||
model = self.model
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
task_specific_config = getattr(model.config, "task_specific_params", {})
|
||||
translation_config = task_specific_config.get("translation_en_to_ro", {})
|
||||
|
||||
@@ -138,11 +138,11 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
@cached_property
|
||||
def t5_base_tokenizer(self):
|
||||
return T5Tokenizer.from_pretrained("t5-base")
|
||||
return T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
@cached_property
|
||||
def t5_base_tokenizer_fast(self):
|
||||
return T5TokenizerFast.from_pretrained("t5-base")
|
||||
return T5TokenizerFast.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def get_tokenizer(self, **kwargs) -> T5Tokenizer:
|
||||
return self.tokenizer_class.from_pretrained(self.tmpdirname, **kwargs)
|
||||
@@ -373,7 +373,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
self.tokenizer_integration_test_util(
|
||||
expected_encoding=expected_encoding,
|
||||
model_name="t5-base",
|
||||
model_name="google-t5/t5-base",
|
||||
revision="5a7ff2d8f5117c194c7e32ec1ccbf04642cca99b",
|
||||
)
|
||||
|
||||
@@ -400,7 +400,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010)))
|
||||
|
||||
def test_some_edge_cases(self):
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base", legacy=False)
|
||||
|
||||
sp_tokens = tokenizer.sp_model.encode("</s>>", out_type=str)
|
||||
self.assertEqual(sp_tokens, ["<", "/", "s", ">", ">"])
|
||||
@@ -426,8 +426,8 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_fast_slow_edge_cases(self):
|
||||
# We are testing spaces before and spaces after special tokens + space transformations
|
||||
slow_tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
|
||||
fast_tokenizer = T5TokenizerFast.from_pretrained("t5-base", legacy=False, from_slow=True)
|
||||
slow_tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base", legacy=False)
|
||||
fast_tokenizer = T5TokenizerFast.from_pretrained("google-t5/t5-base", legacy=False, from_slow=True)
|
||||
slow_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=False))
|
||||
fast_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=False))
|
||||
|
||||
@@ -445,7 +445,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
with self.subTest(f"fast {edge_case} normalized = False"):
|
||||
self.assertEqual(fast_tokenizer.tokenize(hard_case), EXPECTED_SLOW)
|
||||
|
||||
fast_tokenizer = T5TokenizerFast.from_pretrained("t5-base", legacy=False, from_slow=True)
|
||||
fast_tokenizer = T5TokenizerFast.from_pretrained("google-t5/t5-base", legacy=False, from_slow=True)
|
||||
fast_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=True))
|
||||
|
||||
# `normalized=True` is the default normalization scheme when adding a token. Normalize -> don't strip the space.
|
||||
@@ -604,7 +604,7 @@ class CommonSpmIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
# Test with T5
|
||||
hf_tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
hf_tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
vocab_path = "gs://t5-data/vocabs/cc_all.32000/sentencepiece.model"
|
||||
t5x_tokenizer = SentencePieceVocabulary(vocab_path, extra_ids=300)
|
||||
for text in input_texts:
|
||||
|
||||
Reference in New Issue
Block a user