Update all references to canonical models (#29001)
* Script & Manual edition * Update
This commit is contained in:
@@ -109,7 +109,7 @@ classification MNLI task using the `run_glue` script, with 8 GPUs:
|
||||
```bash
|
||||
torchrun \
|
||||
--nproc_per_node 8 pytorch/text-classification/run_glue.py \
|
||||
--model_name_or_path bert-large-uncased-whole-word-masking \
|
||||
--model_name_or_path google-bert/bert-large-uncased-whole-word-masking \
|
||||
--task_name mnli \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
@@ -153,7 +153,7 @@ classification MNLI task using the `run_glue` script, with 8 TPUs (from this fol
|
||||
```bash
|
||||
python xla_spawn.py --num_cores 8 \
|
||||
text-classification/run_glue.py \
|
||||
--model_name_or_path bert-large-uncased-whole-word-masking \
|
||||
--model_name_or_path google-bert/bert-large-uncased-whole-word-masking \
|
||||
--task_name mnli \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
|
||||
@@ -64,10 +64,10 @@ from transformers import (
|
||||
)
|
||||
|
||||
model = VisionTextDualEncoderModel.from_vision_text_pretrained(
|
||||
"openai/clip-vit-base-patch32", "roberta-base"
|
||||
"openai/clip-vit-base-patch32", "FacebookAI/roberta-base"
|
||||
)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
|
||||
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")
|
||||
image_processor = AutoImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
||||
processor = VisionTextDualEncoderProcessor(image_processor, tokenizer)
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ the tokenization). The loss here is that of causal language modeling.
|
||||
|
||||
```bash
|
||||
python run_clm.py \
|
||||
--model_name_or_path gpt2 \
|
||||
--model_name_or_path openai-community/gpt2 \
|
||||
--dataset_name wikitext \
|
||||
--dataset_config_name wikitext-2-raw-v1 \
|
||||
--per_device_train_batch_size 8 \
|
||||
@@ -53,7 +53,7 @@ To run on your own training and validation files, use the following command:
|
||||
|
||||
```bash
|
||||
python run_clm.py \
|
||||
--model_name_or_path gpt2 \
|
||||
--model_name_or_path openai-community/gpt2 \
|
||||
--train_file path_to_train_file \
|
||||
--validation_file path_to_validation_file \
|
||||
--per_device_train_batch_size 8 \
|
||||
@@ -69,7 +69,7 @@ This uses the built in HuggingFace `Trainer` for training. If you want to use a
|
||||
python run_clm_no_trainer.py \
|
||||
--dataset_name wikitext \
|
||||
--dataset_config_name wikitext-2-raw-v1 \
|
||||
--model_name_or_path gpt2 \
|
||||
--model_name_or_path openai-community/gpt2 \
|
||||
--output_dir /tmp/test-clm
|
||||
```
|
||||
|
||||
@@ -84,7 +84,7 @@ converge slightly slower (over-fitting takes more epochs).
|
||||
|
||||
```bash
|
||||
python run_mlm.py \
|
||||
--model_name_or_path roberta-base \
|
||||
--model_name_or_path FacebookAI/roberta-base \
|
||||
--dataset_name wikitext \
|
||||
--dataset_config_name wikitext-2-raw-v1 \
|
||||
--per_device_train_batch_size 8 \
|
||||
@@ -98,7 +98,7 @@ To run on your own training and validation files, use the following command:
|
||||
|
||||
```bash
|
||||
python run_mlm.py \
|
||||
--model_name_or_path roberta-base \
|
||||
--model_name_or_path FacebookAI/roberta-base \
|
||||
--train_file path_to_train_file \
|
||||
--validation_file path_to_validation_file \
|
||||
--per_device_train_batch_size 8 \
|
||||
@@ -117,7 +117,7 @@ This uses the built in HuggingFace `Trainer` for training. If you want to use a
|
||||
python run_mlm_no_trainer.py \
|
||||
--dataset_name wikitext \
|
||||
--dataset_config_name wikitext-2-raw-v1 \
|
||||
--model_name_or_path roberta-base \
|
||||
--model_name_or_path FacebookAI/roberta-base \
|
||||
--output_dir /tmp/test-mlm
|
||||
```
|
||||
|
||||
@@ -144,7 +144,7 @@ Here is how to fine-tune XLNet on wikitext-2:
|
||||
|
||||
```bash
|
||||
python run_plm.py \
|
||||
--model_name_or_path=xlnet-base-cased \
|
||||
--model_name_or_path=xlnet/xlnet-base-cased \
|
||||
--dataset_name wikitext \
|
||||
--dataset_config_name wikitext-2-raw-v1 \
|
||||
--per_device_train_batch_size 8 \
|
||||
@@ -158,7 +158,7 @@ To fine-tune it on your own training and validation file, run:
|
||||
|
||||
```bash
|
||||
python run_plm.py \
|
||||
--model_name_or_path=xlnet-base-cased \
|
||||
--model_name_or_path=xlnet/xlnet-base-cased \
|
||||
--train_file path_to_train_file \
|
||||
--validation_file path_to_validation_file \
|
||||
--per_device_train_batch_size 8 \
|
||||
@@ -188,7 +188,7 @@ When training a model from scratch, configuration values may be overridden with
|
||||
|
||||
|
||||
```bash
|
||||
python run_clm.py --model_type gpt2 --tokenizer_name gpt2 \ --config_overrides="n_embd=1024,n_head=16,n_layer=48,n_positions=102" \
|
||||
python run_clm.py --model_type openai-community/gpt2 --tokenizer_name openai-community/gpt2 \ --config_overrides="n_embd=1024,n_head=16,n_layer=48,n_positions=102" \
|
||||
[...]
|
||||
```
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ limitations under the License.
|
||||
|
||||
```bash
|
||||
python examples/multiple-choice/run_swag.py \
|
||||
--model_name_or_path roberta-base \
|
||||
--model_name_or_path FacebookAI/roberta-base \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
--learning_rate 5e-5 \
|
||||
@@ -62,7 +62,7 @@ then
|
||||
export DATASET_NAME=swag
|
||||
|
||||
python run_swag_no_trainer.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--dataset_name $DATASET_NAME \
|
||||
--max_seq_length 128 \
|
||||
--per_device_train_batch_size 32 \
|
||||
@@ -89,7 +89,7 @@ that will check everything is ready for training. Finally, you can launch traini
|
||||
export DATASET_NAME=swag
|
||||
|
||||
accelerate launch run_swag_no_trainer.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--dataset_name $DATASET_NAME \
|
||||
--max_seq_length 128 \
|
||||
--per_device_train_batch_size 32 \
|
||||
|
||||
@@ -54,7 +54,7 @@ class TorchXLAExamplesTests(TestCasePlus):
|
||||
./examples/pytorch/text-classification/run_glue.py
|
||||
--num_cores=8
|
||||
./examples/pytorch/text-classification/run_glue.py
|
||||
--model_name_or_path distilbert-base-uncased
|
||||
--model_name_or_path distilbert/distilbert-base-uncased
|
||||
--output_dir {tmp_dir}
|
||||
--overwrite_output_dir
|
||||
--train_file ./tests/fixtures/tests_samples/MRPC/train.csv
|
||||
|
||||
@@ -40,7 +40,7 @@ on a single tesla V100 16GB.
|
||||
|
||||
```bash
|
||||
python run_qa.py \
|
||||
--model_name_or_path bert-base-uncased \
|
||||
--model_name_or_path google-bert/bert-base-uncased \
|
||||
--dataset_name squad \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
@@ -67,7 +67,7 @@ The [`run_qa_beam_search.py`](https://github.com/huggingface/transformers/blob/m
|
||||
|
||||
```bash
|
||||
python run_qa_beam_search.py \
|
||||
--model_name_or_path xlnet-large-cased \
|
||||
--model_name_or_path xlnet/xlnet-large-cased \
|
||||
--dataset_name squad \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
@@ -87,7 +87,7 @@ python run_qa_beam_search.py \
|
||||
export SQUAD_DIR=/path/to/SQUAD
|
||||
|
||||
python run_qa_beam_search.py \
|
||||
--model_name_or_path xlnet-large-cased \
|
||||
--model_name_or_path xlnet/xlnet-large-cased \
|
||||
--dataset_name squad_v2 \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
@@ -111,7 +111,7 @@ This example code fine-tunes T5 on the SQuAD2.0 dataset.
|
||||
|
||||
```bash
|
||||
python run_seq2seq_qa.py \
|
||||
--model_name_or_path t5-small \
|
||||
--model_name_or_path google-t5/t5-small \
|
||||
--dataset_name squad_v2 \
|
||||
--context_column context \
|
||||
--question_column question \
|
||||
@@ -143,7 +143,7 @@ then
|
||||
|
||||
```bash
|
||||
python run_qa_no_trainer.py \
|
||||
--model_name_or_path bert-base-uncased \
|
||||
--model_name_or_path google-bert/bert-base-uncased \
|
||||
--dataset_name squad \
|
||||
--max_seq_length 384 \
|
||||
--doc_stride 128 \
|
||||
@@ -166,7 +166,7 @@ that will check everything is ready for training. Finally, you can launch traini
|
||||
|
||||
```bash
|
||||
accelerate launch run_qa_no_trainer.py \
|
||||
--model_name_or_path bert-base-uncased \
|
||||
--model_name_or_path google-bert/bert-base-uncased \
|
||||
--dataset_name squad \
|
||||
--max_seq_length 384 \
|
||||
--doc_stride 128 \
|
||||
|
||||
@@ -41,7 +41,7 @@ and you also will find examples of these below.
|
||||
Here is an example on a summarization task:
|
||||
```bash
|
||||
python examples/pytorch/summarization/run_summarization.py \
|
||||
--model_name_or_path t5-small \
|
||||
--model_name_or_path google-t5/t5-small \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
--dataset_name cnn_dailymail \
|
||||
@@ -54,9 +54,9 @@ python examples/pytorch/summarization/run_summarization.py \
|
||||
--predict_with_generate
|
||||
```
|
||||
|
||||
Only T5 models `t5-small`, `t5-base`, `t5-large`, `t5-3b` and `t5-11b` must use an additional argument: `--source_prefix "summarize: "`.
|
||||
Only T5 models `google-t5/t5-small`, `google-t5/t5-base`, `google-t5/t5-large`, `google-t5/t5-3b` and `google-t5/t5-11b` must use an additional argument: `--source_prefix "summarize: "`.
|
||||
|
||||
We used CNN/DailyMail dataset in this example as `t5-small` was trained on it and one can get good scores even when pre-training with a very small sample.
|
||||
We used CNN/DailyMail dataset in this example as `google-t5/t5-small` was trained on it and one can get good scores even when pre-training with a very small sample.
|
||||
|
||||
Extreme Summarization (XSum) Dataset is another commonly used dataset for the task of summarization. To use it replace `--dataset_name cnn_dailymail --dataset_config "3.0.0"` with `--dataset_name xsum`.
|
||||
|
||||
@@ -65,7 +65,7 @@ And here is how you would use it on your own files, after adjusting the values f
|
||||
|
||||
```bash
|
||||
python examples/pytorch/summarization/run_summarization.py \
|
||||
--model_name_or_path t5-small \
|
||||
--model_name_or_path google-t5/t5-small \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
--train_file path_to_csv_or_jsonlines_file \
|
||||
@@ -156,7 +156,7 @@ then
|
||||
|
||||
```bash
|
||||
python run_summarization_no_trainer.py \
|
||||
--model_name_or_path t5-small \
|
||||
--model_name_or_path google-t5/t5-small \
|
||||
--dataset_name cnn_dailymail \
|
||||
--dataset_config "3.0.0" \
|
||||
--source_prefix "summarize: " \
|
||||
@@ -179,7 +179,7 @@ that will check everything is ready for training. Finally, you can launch traini
|
||||
|
||||
```bash
|
||||
accelerate launch run_summarization_no_trainer.py \
|
||||
--model_name_or_path t5-small \
|
||||
--model_name_or_path google-t5/t5-small \
|
||||
--dataset_name cnn_dailymail \
|
||||
--dataset_config "3.0.0" \
|
||||
--source_prefix "summarize: " \
|
||||
|
||||
@@ -368,11 +368,11 @@ def main():
|
||||
logger.info(f"Training/evaluation parameters {training_args}")
|
||||
|
||||
if data_args.source_prefix is None and model_args.model_name_or_path in [
|
||||
"t5-small",
|
||||
"t5-base",
|
||||
"t5-large",
|
||||
"t5-3b",
|
||||
"t5-11b",
|
||||
"google-t5/t5-small",
|
||||
"google-t5/t5-base",
|
||||
"google-t5/t5-large",
|
||||
"google-t5/t5-3b",
|
||||
"google-t5/t5-11b",
|
||||
]:
|
||||
logger.warning(
|
||||
"You're running a t5 model but didn't provide a source prefix, which is the expected, e.g. with "
|
||||
|
||||
@@ -339,11 +339,11 @@ def main():
|
||||
|
||||
accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps, **accelerator_log_kwargs)
|
||||
if args.source_prefix is None and args.model_name_or_path in [
|
||||
"t5-small",
|
||||
"t5-base",
|
||||
"t5-large",
|
||||
"t5-3b",
|
||||
"t5-11b",
|
||||
"google-t5/t5-small",
|
||||
"google-t5/t5-base",
|
||||
"google-t5/t5-large",
|
||||
"google-t5/t5-3b",
|
||||
"google-t5/t5-11b",
|
||||
]:
|
||||
logger.warning(
|
||||
"You're running a t5 model but didn't provide a source prefix, which is the expected, e.g. with "
|
||||
|
||||
@@ -80,7 +80,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
{self.examples_dir}/pytorch/text-classification/run_glue_no_trainer.py
|
||||
--model_name_or_path distilbert-base-uncased
|
||||
--model_name_or_path distilbert/distilbert-base-uncased
|
||||
--output_dir {tmp_dir}
|
||||
--train_file ./tests/fixtures/tests_samples/MRPC/train.csv
|
||||
--validation_file ./tests/fixtures/tests_samples/MRPC/dev.csv
|
||||
@@ -105,7 +105,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
{self.examples_dir}/pytorch/language-modeling/run_clm_no_trainer.py
|
||||
--model_name_or_path distilgpt2
|
||||
--model_name_or_path distilbert/distilgpt2
|
||||
--train_file ./tests/fixtures/sample_text.txt
|
||||
--validation_file ./tests/fixtures/sample_text.txt
|
||||
--block_size 128
|
||||
@@ -133,7 +133,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
{self.examples_dir}/pytorch/language-modeling/run_mlm_no_trainer.py
|
||||
--model_name_or_path distilroberta-base
|
||||
--model_name_or_path distilbert/distilroberta-base
|
||||
--train_file ./tests/fixtures/sample_text.txt
|
||||
--validation_file ./tests/fixtures/sample_text.txt
|
||||
--output_dir {tmp_dir}
|
||||
@@ -156,7 +156,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
{self.examples_dir}/pytorch/token-classification/run_ner_no_trainer.py
|
||||
--model_name_or_path bert-base-uncased
|
||||
--model_name_or_path google-bert/bert-base-uncased
|
||||
--train_file tests/fixtures/tests_samples/conll/sample.json
|
||||
--validation_file tests/fixtures/tests_samples/conll/sample.json
|
||||
--output_dir {tmp_dir}
|
||||
@@ -181,7 +181,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
{self.examples_dir}/pytorch/question-answering/run_qa_no_trainer.py
|
||||
--model_name_or_path bert-base-uncased
|
||||
--model_name_or_path google-bert/bert-base-uncased
|
||||
--version_2_with_negative
|
||||
--train_file tests/fixtures/tests_samples/SQUAD/sample.json
|
||||
--validation_file tests/fixtures/tests_samples/SQUAD/sample.json
|
||||
@@ -209,7 +209,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
{self.examples_dir}/pytorch/multiple-choice/run_swag_no_trainer.py
|
||||
--model_name_or_path bert-base-uncased
|
||||
--model_name_or_path google-bert/bert-base-uncased
|
||||
--train_file tests/fixtures/tests_samples/swag/sample.json
|
||||
--validation_file tests/fixtures/tests_samples/swag/sample.json
|
||||
--output_dir {tmp_dir}
|
||||
@@ -232,7 +232,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
{self.examples_dir}/pytorch/summarization/run_summarization_no_trainer.py
|
||||
--model_name_or_path t5-small
|
||||
--model_name_or_path google-t5/t5-small
|
||||
--train_file tests/fixtures/tests_samples/xsum/sample.json
|
||||
--validation_file tests/fixtures/tests_samples/xsum/sample.json
|
||||
--output_dir {tmp_dir}
|
||||
|
||||
@@ -99,7 +99,7 @@ class ExamplesTests(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
run_glue.py
|
||||
--model_name_or_path distilbert-base-uncased
|
||||
--model_name_or_path distilbert/distilbert-base-uncased
|
||||
--output_dir {tmp_dir}
|
||||
--overwrite_output_dir
|
||||
--train_file ./tests/fixtures/tests_samples/MRPC/train.csv
|
||||
@@ -127,7 +127,7 @@ class ExamplesTests(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
run_clm.py
|
||||
--model_name_or_path distilgpt2
|
||||
--model_name_or_path distilbert/distilgpt2
|
||||
--train_file ./tests/fixtures/sample_text.txt
|
||||
--validation_file ./tests/fixtures/sample_text.txt
|
||||
--do_train
|
||||
@@ -160,7 +160,7 @@ class ExamplesTests(TestCasePlus):
|
||||
testargs = f"""
|
||||
run_clm.py
|
||||
--model_type gpt2
|
||||
--tokenizer_name gpt2
|
||||
--tokenizer_name openai-community/gpt2
|
||||
--train_file ./tests/fixtures/sample_text.txt
|
||||
--output_dir {tmp_dir}
|
||||
--config_overrides n_embd=10,n_head=2
|
||||
@@ -181,7 +181,7 @@ class ExamplesTests(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
run_mlm.py
|
||||
--model_name_or_path distilroberta-base
|
||||
--model_name_or_path distilbert/distilroberta-base
|
||||
--train_file ./tests/fixtures/sample_text.txt
|
||||
--validation_file ./tests/fixtures/sample_text.txt
|
||||
--output_dir {tmp_dir}
|
||||
@@ -207,7 +207,7 @@ class ExamplesTests(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
run_ner.py
|
||||
--model_name_or_path bert-base-uncased
|
||||
--model_name_or_path google-bert/bert-base-uncased
|
||||
--train_file tests/fixtures/tests_samples/conll/sample.json
|
||||
--validation_file tests/fixtures/tests_samples/conll/sample.json
|
||||
--output_dir {tmp_dir}
|
||||
@@ -235,7 +235,7 @@ class ExamplesTests(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
run_qa.py
|
||||
--model_name_or_path bert-base-uncased
|
||||
--model_name_or_path google-bert/bert-base-uncased
|
||||
--version_2_with_negative
|
||||
--train_file tests/fixtures/tests_samples/SQUAD/sample.json
|
||||
--validation_file tests/fixtures/tests_samples/SQUAD/sample.json
|
||||
@@ -260,7 +260,7 @@ class ExamplesTests(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
run_seq2seq_qa.py
|
||||
--model_name_or_path t5-small
|
||||
--model_name_or_path google-t5/t5-small
|
||||
--context_column context
|
||||
--question_column question
|
||||
--answer_column answers
|
||||
@@ -289,7 +289,7 @@ class ExamplesTests(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
run_swag.py
|
||||
--model_name_or_path bert-base-uncased
|
||||
--model_name_or_path google-bert/bert-base-uncased
|
||||
--train_file tests/fixtures/tests_samples/swag/sample.json
|
||||
--validation_file tests/fixtures/tests_samples/swag/sample.json
|
||||
--output_dir {tmp_dir}
|
||||
@@ -327,7 +327,7 @@ class ExamplesTests(TestCasePlus):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir()
|
||||
testargs = f"""
|
||||
run_summarization.py
|
||||
--model_name_or_path t5-small
|
||||
--model_name_or_path google-t5/t5-small
|
||||
--train_file tests/fixtures/tests_samples/xsum/sample.json
|
||||
--validation_file tests/fixtures/tests_samples/xsum/sample.json
|
||||
--output_dir {tmp_dir}
|
||||
|
||||
@@ -31,7 +31,7 @@ GLUE is made up of a total of 9 different tasks. Here is how to run the script o
|
||||
export TASK_NAME=mrpc
|
||||
|
||||
python run_glue.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--task_name $TASK_NAME \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
@@ -68,7 +68,7 @@ The following example fine-tunes BERT on the `imdb` dataset hosted on our [hub](
|
||||
|
||||
```bash
|
||||
python run_glue.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--dataset_name imdb \
|
||||
--do_train \
|
||||
--do_predict \
|
||||
@@ -90,7 +90,7 @@ We can specify the metric, the label column and aso choose which text columns to
|
||||
dataset="amazon_reviews_multi"
|
||||
subset="en"
|
||||
python run_classification.py \
|
||||
--model_name_or_path bert-base-uncased \
|
||||
--model_name_or_path google-bert/bert-base-uncased \
|
||||
--dataset_name ${dataset} \
|
||||
--dataset_config_name ${subset} \
|
||||
--shuffle_train_dataset \
|
||||
@@ -113,7 +113,7 @@ The following is a multi-label classification example. It fine-tunes BERT on the
|
||||
dataset="reuters21578"
|
||||
subset="ModApte"
|
||||
python run_classification.py \
|
||||
--model_name_or_path bert-base-uncased \
|
||||
--model_name_or_path google-bert/bert-base-uncased \
|
||||
--dataset_name ${dataset} \
|
||||
--dataset_config_name ${subset} \
|
||||
--shuffle_train_dataset \
|
||||
@@ -175,7 +175,7 @@ then
|
||||
export TASK_NAME=mrpc
|
||||
|
||||
python run_glue_no_trainer.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--task_name $TASK_NAME \
|
||||
--max_length 128 \
|
||||
--per_device_train_batch_size 32 \
|
||||
@@ -202,7 +202,7 @@ that will check everything is ready for training. Finally, you can launch traini
|
||||
export TASK_NAME=mrpc
|
||||
|
||||
accelerate launch run_glue_no_trainer.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--task_name $TASK_NAME \
|
||||
--max_length 128 \
|
||||
--per_device_train_batch_size 32 \
|
||||
@@ -232,7 +232,7 @@ This example code fine-tunes mBERT (multi-lingual BERT) on the XNLI dataset. It
|
||||
|
||||
```bash
|
||||
python run_xnli.py \
|
||||
--model_name_or_path bert-base-multilingual-cased \
|
||||
--model_name_or_path google-bert/bert-base-multilingual-cased \
|
||||
--language de \
|
||||
--train_language en \
|
||||
--do_train \
|
||||
|
||||
@@ -26,6 +26,6 @@ Example usage:
|
||||
|
||||
```bash
|
||||
python run_generation.py \
|
||||
--model_type=gpt2 \
|
||||
--model_name_or_path=gpt2
|
||||
--model_type=openai-community/gpt2 \
|
||||
--model_name_or_path=openai-community/gpt2
|
||||
```
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
""" The examples of running contrastive search on the auto-APIs;
|
||||
|
||||
Running this example:
|
||||
python run_generation_contrastive_search.py --model_name_or_path=gpt2-large --penalty_alpha=0.6 --k=4 --length=256
|
||||
python run_generation_contrastive_search.py --model_name_or_path=openai-community/gpt2-large --penalty_alpha=0.6 --k=4 --length=256
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ The following example fine-tunes BERT on CoNLL-2003:
|
||||
|
||||
```bash
|
||||
python run_ner.py \
|
||||
--model_name_or_path bert-base-uncased \
|
||||
--model_name_or_path google-bert/bert-base-uncased \
|
||||
--dataset_name conll2003 \
|
||||
--output_dir /tmp/test-ner \
|
||||
--do_train \
|
||||
@@ -42,7 +42,7 @@ To run on your own training and validation files, use the following command:
|
||||
|
||||
```bash
|
||||
python run_ner.py \
|
||||
--model_name_or_path bert-base-uncased \
|
||||
--model_name_or_path google-bert/bert-base-uncased \
|
||||
--train_file path_to_train_file \
|
||||
--validation_file path_to_validation_file \
|
||||
--output_dir /tmp/test-ner \
|
||||
@@ -84,7 +84,7 @@ then
|
||||
export TASK_NAME=ner
|
||||
|
||||
python run_ner_no_trainer.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--dataset_name conll2003 \
|
||||
--task_name $TASK_NAME \
|
||||
--max_length 128 \
|
||||
@@ -112,7 +112,7 @@ that will check everything is ready for training. Finally, you can launch traini
|
||||
export TASK_NAME=ner
|
||||
|
||||
accelerate launch run_ner_no_trainer.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--dataset_name conll2003 \
|
||||
--task_name $TASK_NAME \
|
||||
--max_length 128 \
|
||||
|
||||
@@ -59,11 +59,11 @@ python examples/pytorch/translation/run_translation.py \
|
||||
|
||||
MBart and some T5 models require special handling.
|
||||
|
||||
T5 models `t5-small`, `t5-base`, `t5-large`, `t5-3b` and `t5-11b` must use an additional argument: `--source_prefix "translate {source_lang} to {target_lang}"`. For example:
|
||||
T5 models `google-t5/t5-small`, `google-t5/t5-base`, `google-t5/t5-large`, `google-t5/t5-3b` and `google-t5/t5-11b` must use an additional argument: `--source_prefix "translate {source_lang} to {target_lang}"`. For example:
|
||||
|
||||
```bash
|
||||
python examples/pytorch/translation/run_translation.py \
|
||||
--model_name_or_path t5-small \
|
||||
--model_name_or_path google-t5/t5-small \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
--source_lang en \
|
||||
@@ -105,7 +105,7 @@ values for the arguments `--train_file`, `--validation_file` to match your setup
|
||||
|
||||
```bash
|
||||
python examples/pytorch/translation/run_translation.py \
|
||||
--model_name_or_path t5-small \
|
||||
--model_name_or_path google-t5/t5-small \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
--source_lang en \
|
||||
@@ -134,7 +134,7 @@ If you want to use a pre-processed dataset that leads to high BLEU scores, but f
|
||||
|
||||
```bash
|
||||
python examples/pytorch/translation/run_translation.py \
|
||||
--model_name_or_path t5-small \
|
||||
--model_name_or_path google-t5/t5-small \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
--source_lang en \
|
||||
|
||||
@@ -317,11 +317,11 @@ def main():
|
||||
logger.info(f"Training/evaluation parameters {training_args}")
|
||||
|
||||
if data_args.source_prefix is None and model_args.model_name_or_path in [
|
||||
"t5-small",
|
||||
"t5-base",
|
||||
"t5-large",
|
||||
"t5-3b",
|
||||
"t5-11b",
|
||||
"google-t5/t5-small",
|
||||
"google-t5/t5-base",
|
||||
"google-t5/t5-large",
|
||||
"google-t5/t5-3b",
|
||||
"google-t5/t5-11b",
|
||||
]:
|
||||
logger.warning(
|
||||
"You're running a t5 model but didn't provide a source prefix, which is expected, e.g. with "
|
||||
|
||||
Reference in New Issue
Block a user