Update all references to canonical models (#29001)
* Script & Manual edition * Update
This commit is contained in:
@@ -1537,7 +1537,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
testargs = f"""
|
||||
run_glue.py
|
||||
--model_name_or_path distilbert-base-uncased
|
||||
--model_name_or_path distilbert/distilbert-base-uncased
|
||||
--task_name mrpc
|
||||
--do_train
|
||||
--do_eval
|
||||
@@ -1886,7 +1886,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
|
||||
@slow
|
||||
def test_trainer_eval_mrpc(self):
|
||||
MODEL_ID = "bert-base-cased-finetuned-mrpc"
|
||||
MODEL_ID = "google-bert/bert-base-cased-finetuned-mrpc"
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
||||
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
|
||||
data_args = GlueDataTrainingArguments(
|
||||
@@ -1901,7 +1901,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
|
||||
@slow
|
||||
def test_trainer_eval_multiple(self):
|
||||
MODEL_ID = "gpt2"
|
||||
MODEL_ID = "openai-community/gpt2"
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
||||
model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
|
||||
dataset = LineByLineTextDataset(
|
||||
@@ -1930,7 +1930,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
|
||||
@slow
|
||||
def test_trainer_eval_lm(self):
|
||||
MODEL_ID = "distilroberta-base"
|
||||
MODEL_ID = "distilbert/distilroberta-base"
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
||||
dataset = LineByLineTextDataset(
|
||||
tokenizer=tokenizer,
|
||||
@@ -2384,7 +2384,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
"launch",
|
||||
script_path,
|
||||
"--model_name_or_path",
|
||||
"t5-small",
|
||||
"google-t5/t5-small",
|
||||
"--per_device_train_batch_size",
|
||||
"1",
|
||||
"--output_dir",
|
||||
|
||||
@@ -35,7 +35,7 @@ class Seq2seqTrainerTester(TestCasePlus):
|
||||
@require_torch
|
||||
def test_finetune_bert2bert(self):
|
||||
bert2bert = EncoderDecoderModel.from_encoder_decoder_pretrained("prajjwal1/bert-tiny", "prajjwal1/bert-tiny")
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
||||
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
||||
|
||||
bert2bert.config.vocab_size = bert2bert.config.encoder.vocab_size
|
||||
bert2bert.config.eos_token_id = tokenizer.sep_token_id
|
||||
@@ -144,11 +144,11 @@ class Seq2seqTrainerTester(TestCasePlus):
|
||||
MAX_TARGET_LENGTH = 256
|
||||
|
||||
dataset = datasets.load_dataset("gsm8k", "main", split="train[:38]")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, return_tensors="pt", padding="longest")
|
||||
gen_config = GenerationConfig.from_pretrained(
|
||||
"t5-small", max_length=None, min_length=None, max_new_tokens=256, min_new_tokens=1, num_beams=5
|
||||
"google-t5/t5-small", max_length=None, min_length=None, max_new_tokens=256, min_new_tokens=1, num_beams=5
|
||||
)
|
||||
|
||||
training_args = Seq2SeqTrainingArguments(".", predict_with_generate=True)
|
||||
|
||||
Reference in New Issue
Block a user