Update all references to canonical models (#29001)

* Script & Manual edition

* Update
This commit is contained in:
Lysandre Debut
2024-02-16 08:16:58 +01:00
committed by GitHub
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions

View File

@@ -91,8 +91,8 @@ class TokenizerUtilsTest(unittest.TestCase):
def test_batch_encoding_pickle(self):
import numpy as np
tokenizer_p = BertTokenizer.from_pretrained("bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
# Python no tensor
with self.subTest("BatchEncoding (Python, return_tensors=None)"):
@@ -119,8 +119,8 @@ class TokenizerUtilsTest(unittest.TestCase):
def tf_array_equals(t1, t2):
return tf.reduce_all(tf.equal(t1, t2))
tokenizer_p = BertTokenizer.from_pretrained("bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
with self.subTest("BatchEncoding (Python, return_tensors=TENSORFLOW)"):
self.assert_dump_and_restore(
@@ -137,8 +137,8 @@ class TokenizerUtilsTest(unittest.TestCase):
def test_batch_encoding_pickle_pt(self):
import torch
tokenizer_p = BertTokenizer.from_pretrained("bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
with self.subTest("BatchEncoding (Python, return_tensors=PYTORCH)"):
self.assert_dump_and_restore(
@@ -152,8 +152,8 @@ class TokenizerUtilsTest(unittest.TestCase):
@require_tokenizers
def test_batch_encoding_is_fast(self):
tokenizer_p = BertTokenizer.from_pretrained("bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
with self.subTest("Python Tokenizer"):
self.assertFalse(tokenizer_p("Small example to_encode").is_fast)
@@ -163,7 +163,7 @@ class TokenizerUtilsTest(unittest.TestCase):
@require_tokenizers
def test_batch_encoding_word_to_tokens(self):
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
encoded = tokenizer_r(["Test", "\xad", "test"], is_split_into_words=True)
self.assertEqual(encoded.word_to_tokens(0), TokenSpan(start=1, end=2))
@@ -235,7 +235,7 @@ class TokenizerUtilsTest(unittest.TestCase):
def test_padding_accepts_tensors(self):
features = [{"input_ids": np.array([0, 1, 2])}, {"input_ids": np.array([0, 1, 2, 3])}]
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
batch = tokenizer.pad(features, padding=True)
self.assertTrue(isinstance(batch["input_ids"], np.ndarray))
@@ -249,7 +249,7 @@ class TokenizerUtilsTest(unittest.TestCase):
import torch
features = [{"input_ids": torch.tensor([0, 1, 2])}, {"input_ids": torch.tensor([0, 1, 2, 3])}]
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
batch = tokenizer.pad(features, padding=True)
self.assertTrue(isinstance(batch["input_ids"], torch.Tensor))
@@ -263,7 +263,7 @@ class TokenizerUtilsTest(unittest.TestCase):
import tensorflow as tf
features = [{"input_ids": tf.constant([0, 1, 2])}, {"input_ids": tf.constant([0, 1, 2, 3])}]
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
batch = tokenizer.pad(features, padding=True)
self.assertTrue(isinstance(batch["input_ids"], tf.Tensor))