Update all references to canonical models (#29001)

* Script & Manual edition * Update
2024-02-16 08:16:58 +01:00
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions
--- a/tests/tokenization/test_tokenization_fast.py
+++ b/tests/tokenization/test_tokenization_fast.py
@@ -132,7 +132,7 @@ class PreTrainedTokenizationFastTest(TokenizerTesterMixin, unittest.TestCase):

        sentences = ["Hello, y'all!", "How are you 😁 ? There should not be any issue right?"]

-        tokenizer = Tokenizer.from_pretrained("t5-base")
+        tokenizer = Tokenizer.from_pretrained("google-t5/t5-base")
        # Enable padding
        tokenizer.enable_padding(pad_id=0, pad_token="<pad>", length=512, pad_to_multiple_of=8)
        self.assertEqual(
@@ -179,7 +179,7 @@ class PreTrainedTokenizationFastTest(TokenizerTesterMixin, unittest.TestCase):
@require_tokenizers
 class TokenizerVersioningTest(unittest.TestCase):
    def test_local_versioning(self):
-        tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+        tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
        json_tokenizer = json.loads(tokenizer._tokenizer.to_str())
        json_tokenizer["model"]["vocab"]["huggingface"] = len(tokenizer)