Update all references to canonical models (#29001)

* Script & Manual edition

* Update
This commit is contained in:
Lysandre Debut
2024-02-16 08:16:58 +01:00
committed by GitHub
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions

View File

@@ -46,7 +46,7 @@ class AutoConfigTest(unittest.TestCase):
self.assertIsNotNone(importlib.util.find_spec("transformers.models.auto"))
def test_config_from_model_shortcut(self):
config = AutoConfig.from_pretrained("bert-base-uncased")
config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
self.assertIsInstance(config, BertConfig)
def test_config_model_type_from_local_file(self):

View File

@@ -30,7 +30,7 @@ if is_flax_available():
class FlaxAutoModelTest(unittest.TestCase):
@slow
def test_bert_from_pretrained(self):
for model_name in ["bert-base-cased", "bert-large-uncased"]:
for model_name in ["google-bert/bert-base-cased", "google-bert/bert-large-uncased"]:
with self.subTest(model_name):
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
@@ -42,7 +42,7 @@ class FlaxAutoModelTest(unittest.TestCase):
@slow
def test_roberta_from_pretrained(self):
for model_name in ["roberta-base", "roberta-large"]:
for model_name in ["FacebookAI/roberta-base", "FacebookAI/roberta-large"]:
with self.subTest(model_name):
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
@@ -54,7 +54,7 @@ class FlaxAutoModelTest(unittest.TestCase):
@slow
def test_bert_jax_jit(self):
for model_name in ["bert-base-cased", "bert-large-uncased"]:
for model_name in ["google-bert/bert-base-cased", "google-bert/bert-large-uncased"]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = FlaxBertModel.from_pretrained(model_name)
tokens = tokenizer("Do you support jax jitted function?", return_tensors=TensorType.JAX)
@@ -67,7 +67,7 @@ class FlaxAutoModelTest(unittest.TestCase):
@slow
def test_roberta_jax_jit(self):
for model_name in ["roberta-base", "roberta-large"]:
for model_name in ["FacebookAI/roberta-base", "FacebookAI/roberta-large"]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = FlaxRobertaModel.from_pretrained(model_name)
tokens = tokenizer("Do you support jax jitted function?", return_tensors=TensorType.JAX)

View File

@@ -85,7 +85,7 @@ if is_tf_available():
class TFAutoModelTest(unittest.TestCase):
@slow
def test_model_from_pretrained(self):
model_name = "bert-base-cased"
model_name = "google-bert/bert-base-cased"
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -96,7 +96,7 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_model_for_pretraining_from_pretrained(self):
model_name = "bert-base-cased"
model_name = "google-bert/bert-base-cased"
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -155,7 +155,7 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_sequence_classification_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -167,7 +167,7 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_question_answering_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)

View File

@@ -75,7 +75,7 @@ class TFPTAutoModelTest(unittest.TestCase):
@slow
def test_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -91,7 +91,7 @@ class TFPTAutoModelTest(unittest.TestCase):
@slow
def test_model_for_pretraining_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -185,7 +185,7 @@ class TFPTAutoModelTest(unittest.TestCase):
@slow
def test_sequence_classification_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -201,7 +201,7 @@ class TFPTAutoModelTest(unittest.TestCase):
@slow
def test_question_answering_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)

View File

@@ -176,12 +176,14 @@ class AutoTokenizerTest(unittest.TestCase):
@require_tokenizers
def test_from_pretrained_use_fast_toggle(self):
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased"), BertTokenizerFast)
self.assertIsInstance(
AutoTokenizer.from_pretrained("google-bert/bert-base-cased", use_fast=False), BertTokenizer
)
self.assertIsInstance(AutoTokenizer.from_pretrained("google-bert/bert-base-cased"), BertTokenizerFast)
@require_tokenizers
def test_do_lower_case(self):
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", do_lower_case=False)
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased", do_lower_case=False)
sample = "Hello, world. How are you?"
tokens = tokenizer.tokenize(sample)
self.assertEqual("[UNK]", tokens[0])
@@ -211,15 +213,15 @@ class AutoTokenizerTest(unittest.TestCase):
self.assertEqual(tokenizer2.vocab_size, 12)
def test_auto_tokenizer_fast_no_slow(self):
tokenizer = AutoTokenizer.from_pretrained("ctrl")
tokenizer = AutoTokenizer.from_pretrained("Salesforce/ctrl")
# There is no fast CTRL so this always gives us a slow tokenizer.
self.assertIsInstance(tokenizer, CTRLTokenizer)
def test_get_tokenizer_config(self):
# Check we can load the tokenizer config of an online model.
config = get_tokenizer_config("bert-base-cased")
config = get_tokenizer_config("google-bert/bert-base-cased")
_ = config.pop("_commit_hash", None)
# If we ever update bert-base-cased tokenizer config, this dict here will need to be updated.
# If we ever update google-bert/bert-base-cased tokenizer config, this dict here will need to be updated.
self.assertEqual(config, {"do_lower_case": False})
# This model does not have a tokenizer_config so we get back an empty dict.