Update all references to canonical models (#29001)
* Script & Manual edition * Update
This commit is contained in:
@@ -52,7 +52,7 @@ class GenerationConfigTest(unittest.TestCase):
|
||||
self.assertEqual(loaded_config.max_time, None)
|
||||
|
||||
def test_from_model_config(self):
|
||||
model_config = AutoConfig.from_pretrained("gpt2")
|
||||
model_config = AutoConfig.from_pretrained("openai-community/gpt2")
|
||||
generation_config_from_model = GenerationConfig.from_model_config(model_config)
|
||||
default_generation_config = GenerationConfig()
|
||||
|
||||
|
||||
@@ -157,10 +157,10 @@ class GenerationIntegrationTestsMixin:
|
||||
is_pt = not model_cls.__name__.startswith("TF")
|
||||
|
||||
articles = ["Justin Timberlake", "Michael Phelps"]
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilgpt2", padding_side="left")
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2", padding_side="left")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
model = model_cls.from_pretrained("distilgpt2")
|
||||
model = model_cls.from_pretrained("distilbert/distilgpt2")
|
||||
input_ids = tokenizer(articles, return_tensors=return_tensors, padding=True).input_ids
|
||||
if is_pt:
|
||||
model = model.to(torch_device)
|
||||
@@ -193,10 +193,10 @@ class GenerationIntegrationTestsMixin:
|
||||
is_pt = not model_cls.__name__.startswith("TF")
|
||||
|
||||
articles = ["Justin Timberlake", "Michael Phelps"]
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilgpt2", padding_side="left")
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2", padding_side="left")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
model = model_cls.from_pretrained("distilgpt2")
|
||||
model = model_cls.from_pretrained("distilbert/distilgpt2")
|
||||
input_ids = tokenizer(articles, return_tensors=return_tensors, padding=True).input_ids
|
||||
if is_pt:
|
||||
model = model.to(torch_device)
|
||||
@@ -375,7 +375,7 @@ class GenerationIntegrationTestsMixin:
|
||||
is_pt = not model_cls.__name__.startswith("TF")
|
||||
|
||||
input_ids = create_tensor_fn(2 * [[822, 10, 571, 33, 25, 58, 2625, 10, 27, 141, 3, 9, 307, 239, 6, 1]])
|
||||
model = model_cls.from_pretrained("t5-small")
|
||||
model = model_cls.from_pretrained("google-t5/t5-small")
|
||||
if is_pt:
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
@@ -89,8 +89,8 @@ class StreamerTester(unittest.TestCase):
|
||||
# Tests that we can pass `decode_kwargs` to the streamer to control how the tokens are decoded. Must be tested
|
||||
# with actual models -- the dummy models' tokenizers are not aligned with their models, and
|
||||
# `skip_special_tokens=True` has no effect on them
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
|
||||
model = AutoModelForCausalLM.from_pretrained("distilgpt2").to(torch_device)
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2").to(torch_device)
|
||||
model.config.eos_token_id = -1
|
||||
|
||||
input_ids = torch.ones((1, 5), device=torch_device).long() * model.config.bos_token_id
|
||||
|
||||
@@ -2840,8 +2840,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertTrue(torch.allclose(transition_scores_sum, outputs.sequences_scores, atol=1e-3))
|
||||
|
||||
def test_beam_search_low_memory(self):
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = AutoModelForCausalLM.from_pretrained("gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
model_inputs = tokenizer("I", return_tensors="pt")["input_ids"]
|
||||
|
||||
@@ -2857,8 +2857,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
# PT-only test: TF doesn't have a BeamSearchScorer
|
||||
# exactly the example provided in the docstrings of beam search, which previously
|
||||
# failed after directly copying from it. Refer to PR #15555
|
||||
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
|
||||
|
||||
encoder_input_str = "translate English to German: How old are you?"
|
||||
encoder_input_ids = tokenizer(encoder_input_str, return_tensors="pt").input_ids
|
||||
@@ -2898,8 +2898,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
@slow
|
||||
def test_constrained_beam_search(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
force_tokens = tokenizer("scared", add_prefix_space=True, add_special_tokens=False).input_ids
|
||||
force_tokens_2 = tokenizer("big weapons", add_prefix_space=True, add_special_tokens=False).input_ids
|
||||
@@ -2936,8 +2936,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
@slow
|
||||
def test_constrained_beam_search_mixed(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
force_phrase = tokenizer("scared", add_prefix_space=True, add_special_tokens=False).input_ids
|
||||
flexible_phrases = tokenizer(
|
||||
@@ -2977,8 +2977,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
@slow
|
||||
def test_constrained_beam_search_mixed_mixin(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
force_word = "scared"
|
||||
force_flexible = ["scream", "screams", "screaming", "screamed"]
|
||||
@@ -3014,8 +3014,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
|
||||
@slow
|
||||
def test_cfg_mixin(self):
|
||||
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
input = tokenizer(["The dragon flew over Paris,"], return_tensors="pt", return_attention_mask=True)
|
||||
input["input_ids"] = input["input_ids"].to(torch_device)
|
||||
@@ -3055,8 +3055,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
@slow
|
||||
def test_constrained_beam_search_example_translation_mixin(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
|
||||
|
||||
encoder_input_str = "translate English to German: How old are you?"
|
||||
force_words = ["sind"]
|
||||
@@ -3080,8 +3080,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
@slow
|
||||
def test_constrained_beam_search_example_integration(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
|
||||
|
||||
encoder_input_str = "translate English to German: How old are you?"
|
||||
encoder_input_ids = tokenizer(encoder_input_str, return_tensors="pt").input_ids
|
||||
|
||||
Reference in New Issue
Block a user