Update all references to canonical models (#29001)

* Script & Manual edition * Update
2024-02-16 08:16:58 +01:00
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions
--- a/examples/pytorch/README.md
+++ b/examples/pytorch/README.md
@@ -109,7 +109,7 @@ classification MNLI task using the `run_glue` script, with 8 GPUs:
 ```bash
 torchrun \
    --nproc_per_node 8 pytorch/text-classification/run_glue.py \
-    --model_name_or_path bert-large-uncased-whole-word-masking \
+    --model_name_or_path google-bert/bert-large-uncased-whole-word-masking \
    --task_name mnli \
    --do_train \
    --do_eval \
@@ -153,7 +153,7 @@ classification MNLI task using the `run_glue` script, with 8 TPUs (from this fol
 ```bash
 python xla_spawn.py --num_cores 8 \
    text-classification/run_glue.py \
-    --model_name_or_path bert-large-uncased-whole-word-masking \
+    --model_name_or_path google-bert/bert-large-uncased-whole-word-masking \
    --task_name mnli \
    --do_train \
    --do_eval \
--- a/examples/pytorch/contrastive-image-text/README.md
+++ b/examples/pytorch/contrastive-image-text/README.md
@@ -64,10 +64,10 @@ from transformers import (
 )

 model = VisionTextDualEncoderModel.from_vision_text_pretrained(
-    "openai/clip-vit-base-patch32", "roberta-base"
+    "openai/clip-vit-base-patch32", "FacebookAI/roberta-base"
 )

-tokenizer = AutoTokenizer.from_pretrained("roberta-base")
+tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")
 image_processor = AutoImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
 processor = VisionTextDualEncoderProcessor(image_processor, tokenizer)

--- a/examples/pytorch/language-modeling/README.md
+++ b/examples/pytorch/language-modeling/README.md
@@ -36,7 +36,7 @@ the tokenization). The loss here is that of causal language modeling.

 ```bash
 python run_clm.py \
-    --model_name_or_path gpt2 \
+    --model_name_or_path openai-community/gpt2 \
    --dataset_name wikitext \
    --dataset_config_name wikitext-2-raw-v1 \
    --per_device_train_batch_size 8 \
@@ -53,7 +53,7 @@ To run on your own training and validation files, use the following command:

 ```bash
 python run_clm.py \
-    --model_name_or_path gpt2 \
+    --model_name_or_path openai-community/gpt2 \
    --train_file path_to_train_file \
    --validation_file path_to_validation_file \
    --per_device_train_batch_size 8 \
@@ -69,7 +69,7 @@ This uses the built in HuggingFace `Trainer` for training. If you want to use a
 python run_clm_no_trainer.py \
    --dataset_name wikitext \
    --dataset_config_name wikitext-2-raw-v1 \
-    --model_name_or_path gpt2 \
+    --model_name_or_path openai-community/gpt2 \
    --output_dir /tmp/test-clm
 ```

@@ -84,7 +84,7 @@ converge slightly slower (over-fitting takes more epochs).

 ```bash
 python run_mlm.py \
-    --model_name_or_path roberta-base \
+    --model_name_or_path FacebookAI/roberta-base \
    --dataset_name wikitext \
    --dataset_config_name wikitext-2-raw-v1 \
    --per_device_train_batch_size 8 \
@@ -98,7 +98,7 @@ To run on your own training and validation files, use the following command:

 ```bash
 python run_mlm.py \
-    --model_name_or_path roberta-base \
+    --model_name_or_path FacebookAI/roberta-base \
    --train_file path_to_train_file \
    --validation_file path_to_validation_file \
    --per_device_train_batch_size 8 \
@@ -117,7 +117,7 @@ This uses the built in HuggingFace `Trainer` for training. If you want to use a
 python run_mlm_no_trainer.py \
    --dataset_name wikitext \
    --dataset_config_name wikitext-2-raw-v1 \
-    --model_name_or_path roberta-base \
+    --model_name_or_path FacebookAI/roberta-base \
    --output_dir /tmp/test-mlm
 ```

@@ -144,7 +144,7 @@ Here is how to fine-tune XLNet on wikitext-2:

 ```bash
 python run_plm.py \
-    --model_name_or_path=xlnet-base-cased \
+    --model_name_or_path=xlnet/xlnet-base-cased \
    --dataset_name wikitext \
    --dataset_config_name wikitext-2-raw-v1 \
    --per_device_train_batch_size 8 \
@@ -158,7 +158,7 @@ To fine-tune it on your own training and validation file, run:

 ```bash
 python run_plm.py \
-    --model_name_or_path=xlnet-base-cased \
+    --model_name_or_path=xlnet/xlnet-base-cased \
    --train_file path_to_train_file \
    --validation_file path_to_validation_file \
    --per_device_train_batch_size 8 \
@@ -188,7 +188,7 @@ When training a model from scratch, configuration values may be overridden with


 ```bash
-python run_clm.py --model_type gpt2 --tokenizer_name gpt2 \ --config_overrides="n_embd=1024,n_head=16,n_layer=48,n_positions=102" \
+python run_clm.py --model_type openai-community/gpt2 --tokenizer_name openai-community/gpt2 \ --config_overrides="n_embd=1024,n_head=16,n_layer=48,n_positions=102" \
 [...]
 ```

--- a/examples/pytorch/multiple-choice/README.md
+++ b/examples/pytorch/multiple-choice/README.md
@@ -22,7 +22,7 @@ limitations under the License.

 ```bash
 python examples/multiple-choice/run_swag.py \
--model_name_or_path roberta-base \
+--model_name_or_path FacebookAI/roberta-base \
 --do_train \
 --do_eval \
 --learning_rate 5e-5 \
@@ -62,7 +62,7 @@ then
 export DATASET_NAME=swag

 python run_swag_no_trainer.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path google-bert/bert-base-cased \
  --dataset_name $DATASET_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
@@ -89,7 +89,7 @@ that will check everything is ready for training. Finally, you can launch traini
 export DATASET_NAME=swag

 accelerate launch run_swag_no_trainer.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path google-bert/bert-base-cased \
  --dataset_name $DATASET_NAME \
  --max_seq_length 128 \
  --per_device_train_batch_size 32 \
--- a/examples/pytorch/old_test_xla_examples.py
+++ b/examples/pytorch/old_test_xla_examples.py
@@ -54,7 +54,7 @@ class TorchXLAExamplesTests(TestCasePlus):
            ./examples/pytorch/text-classification/run_glue.py
            --num_cores=8
            ./examples/pytorch/text-classification/run_glue.py
-            --model_name_or_path distilbert-base-uncased
+            --model_name_or_path distilbert/distilbert-base-uncased
            --output_dir {tmp_dir}
            --overwrite_output_dir
            --train_file ./tests/fixtures/tests_samples/MRPC/train.csv
--- a/examples/pytorch/question-answering/README.md
+++ b/examples/pytorch/question-answering/README.md
@@ -40,7 +40,7 @@ on a single tesla V100 16GB.

 ```bash
 python run_qa.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path google-bert/bert-base-uncased \
  --dataset_name squad \
  --do_train \
  --do_eval \
@@ -67,7 +67,7 @@ The [`run_qa_beam_search.py`](https://github.com/huggingface/transformers/blob/m

 ```bash
 python run_qa_beam_search.py \
-    --model_name_or_path xlnet-large-cased \
+    --model_name_or_path xlnet/xlnet-large-cased \
    --dataset_name squad \
    --do_train \
    --do_eval \
@@ -87,7 +87,7 @@ python run_qa_beam_search.py \
 export SQUAD_DIR=/path/to/SQUAD

 python run_qa_beam_search.py \
-    --model_name_or_path xlnet-large-cased \
+    --model_name_or_path xlnet/xlnet-large-cased \
    --dataset_name squad_v2 \
    --do_train \
    --do_eval \
@@ -111,7 +111,7 @@ This example code fine-tunes T5 on the SQuAD2.0 dataset.

 ```bash
 python run_seq2seq_qa.py \
-  --model_name_or_path t5-small \
+  --model_name_or_path google-t5/t5-small \
  --dataset_name squad_v2 \
  --context_column context \
  --question_column question \
@@ -143,7 +143,7 @@ then

 ```bash
 python run_qa_no_trainer.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path google-bert/bert-base-uncased \
  --dataset_name squad \
  --max_seq_length 384 \
  --doc_stride 128 \
@@ -166,7 +166,7 @@ that will check everything is ready for training. Finally, you can launch traini

 ```bash
 accelerate launch run_qa_no_trainer.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path google-bert/bert-base-uncased \
  --dataset_name squad \
  --max_seq_length 384 \
  --doc_stride 128 \
--- a/examples/pytorch/summarization/README.md
+++ b/examples/pytorch/summarization/README.md
@@ -41,7 +41,7 @@ and you also will find examples of these below.
 Here is an example on a summarization task:
 ```bash
 python examples/pytorch/summarization/run_summarization.py \
-    --model_name_or_path t5-small \
+    --model_name_or_path google-t5/t5-small \
    --do_train \
    --do_eval \
    --dataset_name cnn_dailymail \
@@ -54,9 +54,9 @@ python examples/pytorch/summarization/run_summarization.py \
    --predict_with_generate
 ```

-Only T5 models `t5-small`, `t5-base`, `t5-large`, `t5-3b` and `t5-11b` must use an additional argument: `--source_prefix "summarize: "`.
+Only T5 models `google-t5/t5-small`, `google-t5/t5-base`, `google-t5/t5-large`, `google-t5/t5-3b` and `google-t5/t5-11b` must use an additional argument: `--source_prefix "summarize: "`.

-We used CNN/DailyMail dataset in this example as `t5-small` was trained on it and one can get good scores even when pre-training with a very small sample.
+We used CNN/DailyMail dataset in this example as `google-t5/t5-small` was trained on it and one can get good scores even when pre-training with a very small sample.

 Extreme Summarization (XSum) Dataset is another commonly used dataset for the task of summarization. To use it replace `--dataset_name cnn_dailymail --dataset_config "3.0.0"` with  `--dataset_name xsum`.

@@ -65,7 +65,7 @@ And here is how you would use it on your own files, after adjusting the values f

 ```bash
 python examples/pytorch/summarization/run_summarization.py \
-    --model_name_or_path t5-small \
+    --model_name_or_path google-t5/t5-small \
    --do_train \
    --do_eval \
    --train_file path_to_csv_or_jsonlines_file \
@@ -156,7 +156,7 @@ then

 ```bash
 python run_summarization_no_trainer.py \
-    --model_name_or_path t5-small \
+    --model_name_or_path google-t5/t5-small \
    --dataset_name cnn_dailymail \
    --dataset_config "3.0.0" \
    --source_prefix "summarize: " \
@@ -179,7 +179,7 @@ that will check everything is ready for training. Finally, you can launch traini

 ```bash
 accelerate launch run_summarization_no_trainer.py \
-    --model_name_or_path t5-small \
+    --model_name_or_path google-t5/t5-small \
    --dataset_name cnn_dailymail \
    --dataset_config "3.0.0" \
    --source_prefix "summarize: " \
--- a/examples/pytorch/summarization/run_summarization.py
+++ b/examples/pytorch/summarization/run_summarization.py
@@ -368,11 +368,11 @@ def main():
    logger.info(f"Training/evaluation parameters {training_args}")

    if data_args.source_prefix is None and model_args.model_name_or_path in [
-        "t5-small",
-        "t5-base",
-        "t5-large",
-        "t5-3b",
-        "t5-11b",
+        "google-t5/t5-small",
+        "google-t5/t5-base",
+        "google-t5/t5-large",
+        "google-t5/t5-3b",
+        "google-t5/t5-11b",
    ]:
        logger.warning(
            "You're running a t5 model but didn't provide a source prefix, which is the expected, e.g. with "
--- a/examples/pytorch/summarization/run_summarization_no_trainer.py
+++ b/examples/pytorch/summarization/run_summarization_no_trainer.py
@@ -339,11 +339,11 @@ def main():

    accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps, **accelerator_log_kwargs)
    if args.source_prefix is None and args.model_name_or_path in [
-        "t5-small",
-        "t5-base",
-        "t5-large",
-        "t5-3b",
-        "t5-11b",
+        "google-t5/t5-small",
+        "google-t5/t5-base",
+        "google-t5/t5-large",
+        "google-t5/t5-3b",
+        "google-t5/t5-11b",
    ]:
        logger.warning(
            "You're running a t5 model but didn't provide a source prefix, which is the expected, e.g. with "
--- a/examples/pytorch/test_accelerate_examples.py
+++ b/examples/pytorch/test_accelerate_examples.py
@@ -80,7 +80,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            {self.examples_dir}/pytorch/text-classification/run_glue_no_trainer.py
-            --model_name_or_path distilbert-base-uncased
+            --model_name_or_path distilbert/distilbert-base-uncased
            --output_dir {tmp_dir}
            --train_file ./tests/fixtures/tests_samples/MRPC/train.csv
            --validation_file ./tests/fixtures/tests_samples/MRPC/dev.csv
@@ -105,7 +105,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            {self.examples_dir}/pytorch/language-modeling/run_clm_no_trainer.py
-            --model_name_or_path distilgpt2
+            --model_name_or_path distilbert/distilgpt2
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --block_size 128
@@ -133,7 +133,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            {self.examples_dir}/pytorch/language-modeling/run_mlm_no_trainer.py
-            --model_name_or_path distilroberta-base
+            --model_name_or_path distilbert/distilroberta-base
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --output_dir {tmp_dir}
@@ -156,7 +156,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            {self.examples_dir}/pytorch/token-classification/run_ner_no_trainer.py
-            --model_name_or_path bert-base-uncased
+            --model_name_or_path google-bert/bert-base-uncased
            --train_file tests/fixtures/tests_samples/conll/sample.json
            --validation_file tests/fixtures/tests_samples/conll/sample.json
            --output_dir {tmp_dir}
@@ -181,7 +181,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            {self.examples_dir}/pytorch/question-answering/run_qa_no_trainer.py
-            --model_name_or_path bert-base-uncased
+            --model_name_or_path google-bert/bert-base-uncased
            --version_2_with_negative
            --train_file tests/fixtures/tests_samples/SQUAD/sample.json
            --validation_file tests/fixtures/tests_samples/SQUAD/sample.json
@@ -209,7 +209,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            {self.examples_dir}/pytorch/multiple-choice/run_swag_no_trainer.py
-            --model_name_or_path bert-base-uncased
+            --model_name_or_path google-bert/bert-base-uncased
            --train_file tests/fixtures/tests_samples/swag/sample.json
            --validation_file tests/fixtures/tests_samples/swag/sample.json
            --output_dir {tmp_dir}
@@ -232,7 +232,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            {self.examples_dir}/pytorch/summarization/run_summarization_no_trainer.py
-            --model_name_or_path t5-small
+            --model_name_or_path google-t5/t5-small
            --train_file tests/fixtures/tests_samples/xsum/sample.json
            --validation_file tests/fixtures/tests_samples/xsum/sample.json
            --output_dir {tmp_dir}
--- a/examples/pytorch/test_pytorch_examples.py
+++ b/examples/pytorch/test_pytorch_examples.py
@@ -99,7 +99,7 @@ class ExamplesTests(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_glue.py
-            --model_name_or_path distilbert-base-uncased
+            --model_name_or_path distilbert/distilbert-base-uncased
            --output_dir {tmp_dir}
            --overwrite_output_dir
            --train_file ./tests/fixtures/tests_samples/MRPC/train.csv
@@ -127,7 +127,7 @@ class ExamplesTests(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_clm.py
-            --model_name_or_path distilgpt2
+            --model_name_or_path distilbert/distilgpt2
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --do_train
@@ -160,7 +160,7 @@ class ExamplesTests(TestCasePlus):
        testargs = f"""
            run_clm.py
            --model_type gpt2
-            --tokenizer_name gpt2
+            --tokenizer_name openai-community/gpt2
            --train_file ./tests/fixtures/sample_text.txt
            --output_dir {tmp_dir}
            --config_overrides n_embd=10,n_head=2
@@ -181,7 +181,7 @@ class ExamplesTests(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_mlm.py
-            --model_name_or_path distilroberta-base
+            --model_name_or_path distilbert/distilroberta-base
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --output_dir {tmp_dir}
@@ -207,7 +207,7 @@ class ExamplesTests(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_ner.py
-            --model_name_or_path bert-base-uncased
+            --model_name_or_path google-bert/bert-base-uncased
            --train_file tests/fixtures/tests_samples/conll/sample.json
            --validation_file tests/fixtures/tests_samples/conll/sample.json
            --output_dir {tmp_dir}
@@ -235,7 +235,7 @@ class ExamplesTests(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_qa.py
-            --model_name_or_path bert-base-uncased
+            --model_name_or_path google-bert/bert-base-uncased
            --version_2_with_negative
            --train_file tests/fixtures/tests_samples/SQUAD/sample.json
            --validation_file tests/fixtures/tests_samples/SQUAD/sample.json
@@ -260,7 +260,7 @@ class ExamplesTests(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_seq2seq_qa.py
-            --model_name_or_path t5-small
+            --model_name_or_path google-t5/t5-small
            --context_column context
            --question_column question
            --answer_column answers
@@ -289,7 +289,7 @@ class ExamplesTests(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_swag.py
-            --model_name_or_path bert-base-uncased
+            --model_name_or_path google-bert/bert-base-uncased
            --train_file tests/fixtures/tests_samples/swag/sample.json
            --validation_file tests/fixtures/tests_samples/swag/sample.json
            --output_dir {tmp_dir}
@@ -327,7 +327,7 @@ class ExamplesTests(TestCasePlus):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
            run_summarization.py
-            --model_name_or_path t5-small
+            --model_name_or_path google-t5/t5-small
            --train_file tests/fixtures/tests_samples/xsum/sample.json
            --validation_file tests/fixtures/tests_samples/xsum/sample.json
            --output_dir {tmp_dir}
--- a/examples/pytorch/text-classification/README.md
+++ b/examples/pytorch/text-classification/README.md
@@ -31,7 +31,7 @@ GLUE is made up of a total of 9 different tasks. Here is how to run the script o
 export TASK_NAME=mrpc

 python run_glue.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path google-bert/bert-base-cased \
  --task_name $TASK_NAME \
  --do_train \
  --do_eval \
@@ -68,7 +68,7 @@ The following example fine-tunes BERT on the `imdb` dataset hosted on our [hub](

 ```bash
 python run_glue.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path google-bert/bert-base-cased \
  --dataset_name imdb  \
  --do_train \
  --do_predict \
@@ -90,7 +90,7 @@ We can specify the metric, the label column and aso choose which text columns to
 dataset="amazon_reviews_multi"
 subset="en"
 python run_classification.py \
-    --model_name_or_path  bert-base-uncased \
+    --model_name_or_path  google-bert/bert-base-uncased \
    --dataset_name ${dataset} \
    --dataset_config_name ${subset} \
    --shuffle_train_dataset \
@@ -113,7 +113,7 @@ The following is a multi-label classification example. It fine-tunes BERT on the
 dataset="reuters21578"
 subset="ModApte"
 python run_classification.py \
-    --model_name_or_path bert-base-uncased \
+    --model_name_or_path google-bert/bert-base-uncased \
    --dataset_name ${dataset} \
    --dataset_config_name ${subset} \
    --shuffle_train_dataset \
@@ -175,7 +175,7 @@ then
 export TASK_NAME=mrpc

 python run_glue_no_trainer.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path google-bert/bert-base-cased \
  --task_name $TASK_NAME \
  --max_length 128 \
  --per_device_train_batch_size 32 \
@@ -202,7 +202,7 @@ that will check everything is ready for training. Finally, you can launch traini
 export TASK_NAME=mrpc

 accelerate launch run_glue_no_trainer.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path google-bert/bert-base-cased \
  --task_name $TASK_NAME \
  --max_length 128 \
  --per_device_train_batch_size 32 \
@@ -232,7 +232,7 @@ This example code fine-tunes mBERT (multi-lingual BERT) on the XNLI dataset. It

 ```bash
 python run_xnli.py \
-  --model_name_or_path bert-base-multilingual-cased \
+  --model_name_or_path google-bert/bert-base-multilingual-cased \
  --language de \
  --train_language en \
  --do_train \
--- a/examples/pytorch/text-generation/README.md
+++ b/examples/pytorch/text-generation/README.md
@@ -26,6 +26,6 @@ Example usage:

 ```bash
 python run_generation.py \
-    --model_type=gpt2 \
-    --model_name_or_path=gpt2
+    --model_type=openai-community/gpt2 \
+    --model_name_or_path=openai-community/gpt2
 ```
--- a/examples/pytorch/text-generation/run_generation_contrastive_search.py
+++ b/examples/pytorch/text-generation/run_generation_contrastive_search.py
@@ -16,7 +16,7 @@
 """ The examples of running contrastive search on the auto-APIs;

 Running this example:
-python run_generation_contrastive_search.py --model_name_or_path=gpt2-large --penalty_alpha=0.6 --k=4 --length=256
+python run_generation_contrastive_search.py --model_name_or_path=openai-community/gpt2-large --penalty_alpha=0.6 --k=4 --length=256
 """


--- a/examples/pytorch/token-classification/README.md
+++ b/examples/pytorch/token-classification/README.md
@@ -29,7 +29,7 @@ The following example fine-tunes BERT on CoNLL-2003:

 ```bash
 python run_ner.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path google-bert/bert-base-uncased \
  --dataset_name conll2003 \
  --output_dir /tmp/test-ner \
  --do_train \
@@ -42,7 +42,7 @@ To run on your own training and validation files, use the following command:

 ```bash
 python run_ner.py \
-  --model_name_or_path bert-base-uncased \
+  --model_name_or_path google-bert/bert-base-uncased \
  --train_file path_to_train_file \
  --validation_file path_to_validation_file \
  --output_dir /tmp/test-ner \
@@ -84,7 +84,7 @@ then
 export TASK_NAME=ner

 python run_ner_no_trainer.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path google-bert/bert-base-cased \
  --dataset_name conll2003 \
  --task_name $TASK_NAME \
  --max_length 128 \
@@ -112,7 +112,7 @@ that will check everything is ready for training. Finally, you can launch traini
 export TASK_NAME=ner

 accelerate launch run_ner_no_trainer.py \
-  --model_name_or_path bert-base-cased \
+  --model_name_or_path google-bert/bert-base-cased \
  --dataset_name conll2003 \
  --task_name $TASK_NAME \
  --max_length 128 \
--- a/examples/pytorch/translation/README.md
+++ b/examples/pytorch/translation/README.md
@@ -59,11 +59,11 @@ python examples/pytorch/translation/run_translation.py \

 MBart and some T5 models require special handling.

-T5 models `t5-small`, `t5-base`, `t5-large`, `t5-3b` and `t5-11b` must use an additional argument: `--source_prefix "translate {source_lang} to {target_lang}"`. For example:
+T5 models `google-t5/t5-small`, `google-t5/t5-base`, `google-t5/t5-large`, `google-t5/t5-3b` and `google-t5/t5-11b` must use an additional argument: `--source_prefix "translate {source_lang} to {target_lang}"`. For example:

 ```bash
 python examples/pytorch/translation/run_translation.py \
-    --model_name_or_path t5-small \
+    --model_name_or_path google-t5/t5-small \
    --do_train \
    --do_eval \
    --source_lang en \
@@ -105,7 +105,7 @@ values for the arguments `--train_file`, `--validation_file` to match your setup

 ```bash
 python examples/pytorch/translation/run_translation.py \
-    --model_name_or_path t5-small \
+    --model_name_or_path google-t5/t5-small \
    --do_train \
    --do_eval \
    --source_lang en \
@@ -134,7 +134,7 @@ If you want to use a pre-processed dataset that leads to high BLEU scores, but f

 ```bash
 python examples/pytorch/translation/run_translation.py \
-    --model_name_or_path t5-small \
+    --model_name_or_path google-t5/t5-small \
    --do_train \
    --do_eval \
    --source_lang en \
--- a/examples/pytorch/translation/run_translation.py
+++ b/examples/pytorch/translation/run_translation.py
@@ -317,11 +317,11 @@ def main():
    logger.info(f"Training/evaluation parameters {training_args}")

    if data_args.source_prefix is None and model_args.model_name_or_path in [
-        "t5-small",
-        "t5-base",
-        "t5-large",
-        "t5-3b",
-        "t5-11b",
+        "google-t5/t5-small",
+        "google-t5/t5-base",
+        "google-t5/t5-large",
+        "google-t5/t5-3b",
+        "google-t5/t5-11b",
    ]:
        logger.warning(
            "You're running a t5 model but didn't provide a source prefix, which is expected, e.g. with "