Update all references to canonical models (#29001)
* Script & Manual edition * Update
This commit is contained in:
@@ -132,7 +132,7 @@ class PreTrainedTokenizationFastTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
sentences = ["Hello, y'all!", "How are you 😁 ? There should not be any issue right?"]
|
||||
|
||||
tokenizer = Tokenizer.from_pretrained("t5-base")
|
||||
tokenizer = Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
# Enable padding
|
||||
tokenizer.enable_padding(pad_id=0, pad_token="<pad>", length=512, pad_to_multiple_of=8)
|
||||
self.assertEqual(
|
||||
@@ -179,7 +179,7 @@ class PreTrainedTokenizationFastTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
@require_tokenizers
|
||||
class TokenizerVersioningTest(unittest.TestCase):
|
||||
def test_local_versioning(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
json_tokenizer = json.loads(tokenizer._tokenizer.to_str())
|
||||
json_tokenizer["model"]["vocab"]["huggingface"] = len(tokenizer)
|
||||
|
||||
|
||||
@@ -91,8 +91,8 @@ class TokenizerUtilsTest(unittest.TestCase):
|
||||
def test_batch_encoding_pickle(self):
|
||||
import numpy as np
|
||||
|
||||
tokenizer_p = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
|
||||
tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
|
||||
|
||||
# Python no tensor
|
||||
with self.subTest("BatchEncoding (Python, return_tensors=None)"):
|
||||
@@ -119,8 +119,8 @@ class TokenizerUtilsTest(unittest.TestCase):
|
||||
def tf_array_equals(t1, t2):
|
||||
return tf.reduce_all(tf.equal(t1, t2))
|
||||
|
||||
tokenizer_p = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
|
||||
tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
|
||||
|
||||
with self.subTest("BatchEncoding (Python, return_tensors=TENSORFLOW)"):
|
||||
self.assert_dump_and_restore(
|
||||
@@ -137,8 +137,8 @@ class TokenizerUtilsTest(unittest.TestCase):
|
||||
def test_batch_encoding_pickle_pt(self):
|
||||
import torch
|
||||
|
||||
tokenizer_p = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
|
||||
tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
|
||||
|
||||
with self.subTest("BatchEncoding (Python, return_tensors=PYTORCH)"):
|
||||
self.assert_dump_and_restore(
|
||||
@@ -152,8 +152,8 @@ class TokenizerUtilsTest(unittest.TestCase):
|
||||
|
||||
@require_tokenizers
|
||||
def test_batch_encoding_is_fast(self):
|
||||
tokenizer_p = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
|
||||
tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
|
||||
|
||||
with self.subTest("Python Tokenizer"):
|
||||
self.assertFalse(tokenizer_p("Small example to_encode").is_fast)
|
||||
@@ -163,7 +163,7 @@ class TokenizerUtilsTest(unittest.TestCase):
|
||||
|
||||
@require_tokenizers
|
||||
def test_batch_encoding_word_to_tokens(self):
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("bert-base-cased")
|
||||
tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased")
|
||||
encoded = tokenizer_r(["Test", "\xad", "test"], is_split_into_words=True)
|
||||
|
||||
self.assertEqual(encoded.word_to_tokens(0), TokenSpan(start=1, end=2))
|
||||
@@ -235,7 +235,7 @@ class TokenizerUtilsTest(unittest.TestCase):
|
||||
|
||||
def test_padding_accepts_tensors(self):
|
||||
features = [{"input_ids": np.array([0, 1, 2])}, {"input_ids": np.array([0, 1, 2, 3])}]
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
|
||||
batch = tokenizer.pad(features, padding=True)
|
||||
self.assertTrue(isinstance(batch["input_ids"], np.ndarray))
|
||||
@@ -249,7 +249,7 @@ class TokenizerUtilsTest(unittest.TestCase):
|
||||
import torch
|
||||
|
||||
features = [{"input_ids": torch.tensor([0, 1, 2])}, {"input_ids": torch.tensor([0, 1, 2, 3])}]
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
|
||||
batch = tokenizer.pad(features, padding=True)
|
||||
self.assertTrue(isinstance(batch["input_ids"], torch.Tensor))
|
||||
@@ -263,7 +263,7 @@ class TokenizerUtilsTest(unittest.TestCase):
|
||||
import tensorflow as tf
|
||||
|
||||
features = [{"input_ids": tf.constant([0, 1, 2])}, {"input_ids": tf.constant([0, 1, 2, 3])}]
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
|
||||
batch = tokenizer.pad(features, padding=True)
|
||||
self.assertTrue(isinstance(batch["input_ids"], tf.Tensor))
|
||||
|
||||
Reference in New Issue
Block a user