Update all references to canonical models (#29001)

* Script & Manual edition * Update
2024-02-16 08:16:58 +01:00
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions
--- a/tests/models/auto/test_configuration_auto.py
+++ b/tests/models/auto/test_configuration_auto.py
@@ -46,7 +46,7 @@ class AutoConfigTest(unittest.TestCase):
        self.assertIsNotNone(importlib.util.find_spec("transformers.models.auto"))

    def test_config_from_model_shortcut(self):
-        config = AutoConfig.from_pretrained("bert-base-uncased")
+        config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
        self.assertIsInstance(config, BertConfig)

    def test_config_model_type_from_local_file(self):
--- a/tests/models/auto/test_modeling_flax_auto.py
+++ b/tests/models/auto/test_modeling_flax_auto.py
@@ -30,7 +30,7 @@ if is_flax_available():
 class FlaxAutoModelTest(unittest.TestCase):
    @slow
    def test_bert_from_pretrained(self):
-        for model_name in ["bert-base-cased", "bert-large-uncased"]:
+        for model_name in ["google-bert/bert-base-cased", "google-bert/bert-large-uncased"]:
            with self.subTest(model_name):
                config = AutoConfig.from_pretrained(model_name)
                self.assertIsNotNone(config)
@@ -42,7 +42,7 @@ class FlaxAutoModelTest(unittest.TestCase):

    @slow
    def test_roberta_from_pretrained(self):
-        for model_name in ["roberta-base", "roberta-large"]:
+        for model_name in ["FacebookAI/roberta-base", "FacebookAI/roberta-large"]:
            with self.subTest(model_name):
                config = AutoConfig.from_pretrained(model_name)
                self.assertIsNotNone(config)
@@ -54,7 +54,7 @@ class FlaxAutoModelTest(unittest.TestCase):

    @slow
    def test_bert_jax_jit(self):
-        for model_name in ["bert-base-cased", "bert-large-uncased"]:
+        for model_name in ["google-bert/bert-base-cased", "google-bert/bert-large-uncased"]:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = FlaxBertModel.from_pretrained(model_name)
            tokens = tokenizer("Do you support jax jitted function?", return_tensors=TensorType.JAX)
@@ -67,7 +67,7 @@ class FlaxAutoModelTest(unittest.TestCase):

    @slow
    def test_roberta_jax_jit(self):
-        for model_name in ["roberta-base", "roberta-large"]:
+        for model_name in ["FacebookAI/roberta-base", "FacebookAI/roberta-large"]:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = FlaxRobertaModel.from_pretrained(model_name)
            tokens = tokenizer("Do you support jax jitted function?", return_tensors=TensorType.JAX)
--- a/tests/models/auto/test_modeling_tf_auto.py
+++ b/tests/models/auto/test_modeling_tf_auto.py
@@ -85,7 +85,7 @@ if is_tf_available():
 class TFAutoModelTest(unittest.TestCase):
    @slow
    def test_model_from_pretrained(self):
-        model_name = "bert-base-cased"
+        model_name = "google-bert/bert-base-cased"
        config = AutoConfig.from_pretrained(model_name)
        self.assertIsNotNone(config)
        self.assertIsInstance(config, BertConfig)
@@ -96,7 +96,7 @@ class TFAutoModelTest(unittest.TestCase):

    @slow
    def test_model_for_pretraining_from_pretrained(self):
-        model_name = "bert-base-cased"
+        model_name = "google-bert/bert-base-cased"
        config = AutoConfig.from_pretrained(model_name)
        self.assertIsNotNone(config)
        self.assertIsInstance(config, BertConfig)
@@ -155,7 +155,7 @@ class TFAutoModelTest(unittest.TestCase):
    @slow
    def test_sequence_classification_model_from_pretrained(self):
        # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-        for model_name in ["bert-base-uncased"]:
+        for model_name in ["google-bert/bert-base-uncased"]:
            config = AutoConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, BertConfig)
@@ -167,7 +167,7 @@ class TFAutoModelTest(unittest.TestCase):
    @slow
    def test_question_answering_model_from_pretrained(self):
        # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-        for model_name in ["bert-base-uncased"]:
+        for model_name in ["google-bert/bert-base-uncased"]:
            config = AutoConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, BertConfig)
--- a/tests/models/auto/test_modeling_tf_pytorch.py
+++ b/tests/models/auto/test_modeling_tf_pytorch.py
@@ -75,7 +75,7 @@ class TFPTAutoModelTest(unittest.TestCase):
    @slow
    def test_model_from_pretrained(self):
        # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-        for model_name in ["bert-base-uncased"]:
+        for model_name in ["google-bert/bert-base-uncased"]:
            config = AutoConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, BertConfig)
@@ -91,7 +91,7 @@ class TFPTAutoModelTest(unittest.TestCase):
    @slow
    def test_model_for_pretraining_from_pretrained(self):
        # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-        for model_name in ["bert-base-uncased"]:
+        for model_name in ["google-bert/bert-base-uncased"]:
            config = AutoConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, BertConfig)
@@ -185,7 +185,7 @@ class TFPTAutoModelTest(unittest.TestCase):
    @slow
    def test_sequence_classification_model_from_pretrained(self):
        # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-        for model_name in ["bert-base-uncased"]:
+        for model_name in ["google-bert/bert-base-uncased"]:
            config = AutoConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, BertConfig)
@@ -201,7 +201,7 @@ class TFPTAutoModelTest(unittest.TestCase):
    @slow
    def test_question_answering_model_from_pretrained(self):
        # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-        for model_name in ["bert-base-uncased"]:
+        for model_name in ["google-bert/bert-base-uncased"]:
            config = AutoConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, BertConfig)
--- a/tests/models/auto/test_tokenization_auto.py
+++ b/tests/models/auto/test_tokenization_auto.py
@@ -176,12 +176,14 @@ class AutoTokenizerTest(unittest.TestCase):

    @require_tokenizers
    def test_from_pretrained_use_fast_toggle(self):
-        self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
-        self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased"), BertTokenizerFast)
+        self.assertIsInstance(
+            AutoTokenizer.from_pretrained("google-bert/bert-base-cased", use_fast=False), BertTokenizer
+        )
+        self.assertIsInstance(AutoTokenizer.from_pretrained("google-bert/bert-base-cased"), BertTokenizerFast)

    @require_tokenizers
    def test_do_lower_case(self):
-        tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", do_lower_case=False)
+        tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased", do_lower_case=False)
        sample = "Hello, world. How are you?"
        tokens = tokenizer.tokenize(sample)
        self.assertEqual("[UNK]", tokens[0])
@@ -211,15 +213,15 @@ class AutoTokenizerTest(unittest.TestCase):
        self.assertEqual(tokenizer2.vocab_size, 12)

    def test_auto_tokenizer_fast_no_slow(self):
-        tokenizer = AutoTokenizer.from_pretrained("ctrl")
+        tokenizer = AutoTokenizer.from_pretrained("Salesforce/ctrl")
        # There is no fast CTRL so this always gives us a slow tokenizer.
        self.assertIsInstance(tokenizer, CTRLTokenizer)

    def test_get_tokenizer_config(self):
        # Check we can load the tokenizer config of an online model.
-        config = get_tokenizer_config("bert-base-cased")
+        config = get_tokenizer_config("google-bert/bert-base-cased")
        _ = config.pop("_commit_hash", None)
-        # If we ever update bert-base-cased tokenizer config, this dict here will need to be updated.
+        # If we ever update google-bert/bert-base-cased tokenizer config, this dict here will need to be updated.
        self.assertEqual(config, {"do_lower_case": False})

        # This model does not have a tokenizer_config so we get back an empty dict.