Update all references to canonical models (#29001)

* Script & Manual edition

* Update
This commit is contained in:
Lysandre Debut
2024-02-16 08:16:58 +01:00
committed by GitHub
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions

View File

@@ -331,7 +331,7 @@ class AlbertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
class AlbertModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_no_head_absolute_embedding(self):
model = AlbertModel.from_pretrained("albert-base-v2")
model = AlbertModel.from_pretrained("albert/albert-base-v2")
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
with torch.no_grad():

View File

@@ -139,7 +139,7 @@ class FlaxAlbertModelTest(FlaxModelTesterMixin, unittest.TestCase):
@slow
def test_model_from_pretrained(self):
for model_class_name in self.all_model_classes:
model = model_class_name.from_pretrained("albert-base-v2")
model = model_class_name.from_pretrained("albert/albert-base-v2")
outputs = model(np.ones((1, 1)))
self.assertIsNotNone(outputs)
@@ -148,7 +148,7 @@ class FlaxAlbertModelTest(FlaxModelTesterMixin, unittest.TestCase):
class FlaxAlbertModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_no_head_absolute_embedding(self):
model = FlaxAlbertModel.from_pretrained("albert-base-v2")
model = FlaxAlbertModel.from_pretrained("albert/albert-base-v2")
input_ids = np.array([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
attention_mask = np.array([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
output = model(input_ids, attention_mask=attention_mask)[0]

View File

@@ -311,7 +311,7 @@ class TFAlbertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa
class TFAlbertModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_masked_lm(self):
model = TFAlbertForPreTraining.from_pretrained("albert-base-v2")
model = TFAlbertForPreTraining.from_pretrained("albert/albert-base-v2")
input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
output = model(input_ids)[0]

View File

@@ -127,6 +127,6 @@ class AlbertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,
model_name="albert-base-v2",
model_name="albert/albert-base-v2",
revision="6b6560eaf5ff2e250b00c50f380c5389a9c2d82e",
)

View File

@@ -46,7 +46,7 @@ class AutoConfigTest(unittest.TestCase):
self.assertIsNotNone(importlib.util.find_spec("transformers.models.auto"))
def test_config_from_model_shortcut(self):
config = AutoConfig.from_pretrained("bert-base-uncased")
config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
self.assertIsInstance(config, BertConfig)
def test_config_model_type_from_local_file(self):

View File

@@ -30,7 +30,7 @@ if is_flax_available():
class FlaxAutoModelTest(unittest.TestCase):
@slow
def test_bert_from_pretrained(self):
for model_name in ["bert-base-cased", "bert-large-uncased"]:
for model_name in ["google-bert/bert-base-cased", "google-bert/bert-large-uncased"]:
with self.subTest(model_name):
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
@@ -42,7 +42,7 @@ class FlaxAutoModelTest(unittest.TestCase):
@slow
def test_roberta_from_pretrained(self):
for model_name in ["roberta-base", "roberta-large"]:
for model_name in ["FacebookAI/roberta-base", "FacebookAI/roberta-large"]:
with self.subTest(model_name):
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
@@ -54,7 +54,7 @@ class FlaxAutoModelTest(unittest.TestCase):
@slow
def test_bert_jax_jit(self):
for model_name in ["bert-base-cased", "bert-large-uncased"]:
for model_name in ["google-bert/bert-base-cased", "google-bert/bert-large-uncased"]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = FlaxBertModel.from_pretrained(model_name)
tokens = tokenizer("Do you support jax jitted function?", return_tensors=TensorType.JAX)
@@ -67,7 +67,7 @@ class FlaxAutoModelTest(unittest.TestCase):
@slow
def test_roberta_jax_jit(self):
for model_name in ["roberta-base", "roberta-large"]:
for model_name in ["FacebookAI/roberta-base", "FacebookAI/roberta-large"]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = FlaxRobertaModel.from_pretrained(model_name)
tokens = tokenizer("Do you support jax jitted function?", return_tensors=TensorType.JAX)

View File

@@ -85,7 +85,7 @@ if is_tf_available():
class TFAutoModelTest(unittest.TestCase):
@slow
def test_model_from_pretrained(self):
model_name = "bert-base-cased"
model_name = "google-bert/bert-base-cased"
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -96,7 +96,7 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_model_for_pretraining_from_pretrained(self):
model_name = "bert-base-cased"
model_name = "google-bert/bert-base-cased"
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -155,7 +155,7 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_sequence_classification_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -167,7 +167,7 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_question_answering_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)

View File

@@ -75,7 +75,7 @@ class TFPTAutoModelTest(unittest.TestCase):
@slow
def test_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -91,7 +91,7 @@ class TFPTAutoModelTest(unittest.TestCase):
@slow
def test_model_for_pretraining_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -185,7 +185,7 @@ class TFPTAutoModelTest(unittest.TestCase):
@slow
def test_sequence_classification_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -201,7 +201,7 @@ class TFPTAutoModelTest(unittest.TestCase):
@slow
def test_question_answering_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in ["google-bert/bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)

View File

@@ -176,12 +176,14 @@ class AutoTokenizerTest(unittest.TestCase):
@require_tokenizers
def test_from_pretrained_use_fast_toggle(self):
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased"), BertTokenizerFast)
self.assertIsInstance(
AutoTokenizer.from_pretrained("google-bert/bert-base-cased", use_fast=False), BertTokenizer
)
self.assertIsInstance(AutoTokenizer.from_pretrained("google-bert/bert-base-cased"), BertTokenizerFast)
@require_tokenizers
def test_do_lower_case(self):
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", do_lower_case=False)
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased", do_lower_case=False)
sample = "Hello, world. How are you?"
tokens = tokenizer.tokenize(sample)
self.assertEqual("[UNK]", tokens[0])
@@ -211,15 +213,15 @@ class AutoTokenizerTest(unittest.TestCase):
self.assertEqual(tokenizer2.vocab_size, 12)
def test_auto_tokenizer_fast_no_slow(self):
tokenizer = AutoTokenizer.from_pretrained("ctrl")
tokenizer = AutoTokenizer.from_pretrained("Salesforce/ctrl")
# There is no fast CTRL so this always gives us a slow tokenizer.
self.assertIsInstance(tokenizer, CTRLTokenizer)
def test_get_tokenizer_config(self):
# Check we can load the tokenizer config of an online model.
config = get_tokenizer_config("bert-base-cased")
config = get_tokenizer_config("google-bert/bert-base-cased")
_ = config.pop("_commit_hash", None)
# If we ever update bert-base-cased tokenizer config, this dict here will need to be updated.
# If we ever update google-bert/bert-base-cased tokenizer config, this dict here will need to be updated.
self.assertEqual(config, {"do_lower_case": False})
# This model does not have a tokenizer_config so we get back an empty dict.

View File

@@ -627,7 +627,7 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
class BertModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_no_head_absolute_embedding(self):
model = BertModel.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("google-bert/bert-base-uncased")
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
with torch.no_grad():

View File

@@ -158,6 +158,6 @@ class FlaxBertModelTest(FlaxModelTesterMixin, unittest.TestCase):
def test_model_from_pretrained(self):
# Only check this for base model, not necessary for all model classes.
# This will also help speed-up tests.
model = FlaxBertModel.from_pretrained("bert-base-cased")
model = FlaxBertModel.from_pretrained("google-bert/bert-base-cased")
outputs = model(np.ones((1, 1)))
self.assertIsNotNone(outputs)

View File

@@ -242,7 +242,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@slow
def test_sequence_builders(self):
tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased")
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
text = tokenizer.encode("sequence builders", add_special_tokens=False)
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)

View File

@@ -16,7 +16,7 @@ if is_tensorflow_text_available():
from transformers.models.bert import TFBertTokenizer
TOKENIZER_CHECKPOINTS = ["bert-base-uncased", "bert-base-cased"]
TOKENIZER_CHECKPOINTS = ["google-bert/bert-base-uncased", "google-bert/bert-base-cased"]
TINY_MODEL_CHECKPOINT = "hf-internal-testing/tiny-bert-tf-only"
if is_tf_available():

View File

@@ -488,7 +488,7 @@ class BertTokenizerMismatchTest(unittest.TestCase):
" is called from."
)
)
EXAMPLE_BERT_ID = "bert-base-cased"
EXAMPLE_BERT_ID = "google-bert/bert-base-cased"
with self.assertLogs("transformers", level="WARNING") as cm:
BertJapaneseTokenizer.from_pretrained(EXAMPLE_BERT_ID)
self.assertTrue(

View File

@@ -31,7 +31,7 @@ if is_torch_available():
class CamembertModelIntegrationTest(unittest.TestCase):
@slow
def test_output_embeds_base_model(self):
model = CamembertModel.from_pretrained("camembert-base")
model = CamembertModel.from_pretrained("almanach/camembert-base")
model.to(torch_device)
input_ids = torch.tensor(

View File

@@ -128,7 +128,7 @@ class CamembertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,
model_name="camembert-base",
model_name="almanach/camembert-base",
revision="3a0641d9a1aeb7e848a74299e7e4c4bca216b4cf",
sequences=sequences,
)

View File

@@ -50,7 +50,7 @@ class DPRReaderTokenizationTest(BertTokenizationTest):
@slow
def test_decode_best_spans(self):
tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased")
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
text_1 = tokenizer.encode("question sequence", add_special_tokens=False)
text_2 = tokenizer.encode("title sequence", add_special_tokens=False)
@@ -73,7 +73,7 @@ class DPRReaderTokenizationTest(BertTokenizationTest):
@slow
def test_call(self):
tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased")
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
text_1 = tokenizer.encode("question sequence", add_special_tokens=False)
text_2 = tokenizer.encode("title sequence", add_special_tokens=False)

View File

@@ -671,7 +671,9 @@ class EncoderDecoderMixin:
@require_torch
class BertEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
def get_pretrained_model(self):
return EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "bert-base-cased")
return EncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-base-cased", "google-bert/bert-base-cased"
)
def get_encoder_decoder_model(self, config, decoder_config):
encoder_model = BertModel(config)
@@ -937,7 +939,9 @@ class RoBertaEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
}
def get_pretrained_model(self):
return EncoderDecoderModel.from_encoder_decoder_pretrained("roberta-base", "roberta-base")
return EncoderDecoderModel.from_encoder_decoder_pretrained(
"FacebookAI/roberta-base", "FacebookAI/roberta-base"
)
@require_torch
@@ -994,7 +998,9 @@ class GPT2EncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
}
def get_pretrained_model(self):
return EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
return EncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-base-cased", "openai-community/gpt2"
)
def test_encoder_decoder_model_shared_weights(self):
pass
@@ -1004,8 +1010,8 @@ class GPT2EncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2gpt2-cnn_dailymail-fp16")
model.to(torch_device)
tokenizer_in = AutoTokenizer.from_pretrained("bert-base-cased")
tokenizer_out = AutoTokenizer.from_pretrained("gpt2")
tokenizer_in = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer_out = AutoTokenizer.from_pretrained("openai-community/gpt2")
ARTICLE_STUDENTS = """(CNN)Sigma Alpha Epsilon is under fire for a video showing party-bound fraternity members singing a racist chant. SAE's national chapter suspended the students, but University of Oklahoma President David Boren took it a step further, saying the university's affiliation with the fraternity is permanently done. The news is shocking, but it's not the first time SAE has faced controversy. SAE was founded March 9, 1856, at the University of Alabama, five years before the American Civil War, according to the fraternity website. When the war began, the group had fewer than 400 members, of which "369 went to war for the Confederate States and seven for the Union Army," the website says. The fraternity now boasts more than 200,000 living alumni, along with about 15,000 undergraduates populating 219 chapters and 20 "colonies" seeking full membership at universities. SAE has had to work hard to change recently after a string of member deaths, many blamed on the hazing of new recruits, SAE national President Bradley Cohen wrote in a message on the fraternity's website. The fraternity's website lists more than 130 chapters cited or suspended for "health and safety incidents" since 2010. At least 30 of the incidents involved hazing, and dozens more involved alcohol. However, the list is missing numerous incidents from recent months. Among them, according to various media outlets: Yale University banned the SAEs from campus activities last month after members allegedly tried to interfere with a sexual misconduct investigation connected to an initiation rite. Stanford University in December suspended SAE housing privileges after finding sorority members attending a fraternity function were subjected to graphic sexual content. And Johns Hopkins University in November suspended the fraternity for underage drinking. "The media has labeled us as the 'nation's deadliest fraternity,' " Cohen said. In 2011, for example, a student died while being coerced into excessive alcohol consumption, according to a lawsuit. SAE's previous insurer dumped the fraternity. "As a result, we are paying Lloyd's of London the highest insurance rates in the Greek-letter world," Cohen said. Universities have turned down SAE's attempts to open new chapters, and the fraternity had to close 12 in 18 months over hazing incidents."""
@@ -1067,7 +1073,7 @@ class ProphetNetEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
def get_pretrained_model(self):
return EncoderDecoderModel.from_encoder_decoder_pretrained(
"bert-large-uncased", "microsoft/prophetnet-large-uncased"
"google-bert/bert-large-uncased", "microsoft/prophetnet-large-uncased"
)
def test_encoder_decoder_model_shared_weights(self):
@@ -1122,7 +1128,9 @@ class BartEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
}
def get_pretrained_model(self):
return EncoderDecoderModel.from_encoder_decoder_pretrained("bert-large-uncased", "facebook/bart-large")
return EncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-large-uncased", "facebook/bart-large"
)
def test_encoder_decoder_model_shared_weights(self):
pass
@@ -1131,10 +1139,12 @@ class BartEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
@require_torch
class EncoderDecoderModelTest(unittest.TestCase):
def get_from_encoderdecoder_pretrained_model(self):
return EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "bert-base-uncased")
return EncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-base-uncased", "google-bert/bert-base-uncased"
)
def get_decoder_config(self):
config = AutoConfig.from_pretrained("bert-base-uncased")
config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
config.is_decoder = True
config.add_cross_attention = True
return config
@@ -1143,8 +1153,10 @@ class EncoderDecoderModelTest(unittest.TestCase):
return EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
def get_encoder_decoder_models(self):
encoder_model = BertModel.from_pretrained("bert-base-uncased")
decoder_model = BertLMHeadModel.from_pretrained("bert-base-uncased", config=self.get_decoder_config())
encoder_model = BertModel.from_pretrained("google-bert/bert-base-uncased")
decoder_model = BertLMHeadModel.from_pretrained(
"google-bert/bert-base-uncased", config=self.get_decoder_config()
)
return {"encoder": encoder_model, "decoder": decoder_model}
def _check_configuration_tie(self, model):

View File

@@ -483,12 +483,14 @@ class FlaxGPT2EncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.TestCase
}
def get_pretrained_model(self):
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-base-cased", "openai-community/gpt2"
)
@slow
def test_bert2gpt2_summarization(self):
tokenizer_in = AutoTokenizer.from_pretrained("bert-base-cased")
tokenizer_out = AutoTokenizer.from_pretrained("gpt2")
tokenizer_in = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer_out = AutoTokenizer.from_pretrained("openai-community/gpt2")
model = FlaxEncoderDecoderModel.from_pretrained(
"patrickvonplaten/bert2gpt2-cnn_dailymail-fp16", pad_token_id=tokenizer_out.eos_token_id
@@ -539,7 +541,9 @@ class FlaxBartEncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.TestCase
}
def get_pretrained_model(self):
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "facebook/bart-base")
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-base-cased", "facebook/bart-base"
)
@require_flax
@@ -576,13 +580,17 @@ class FlaxBertEncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.TestCase
}
def get_pretrained_model(self):
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "bert-base-cased")
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-base-cased", "google-bert/bert-base-cased"
)
@require_flax
class FlaxEncoderDecoderModelTest(unittest.TestCase):
def get_from_encoderdecoder_pretrained_model(self):
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-base-cased", "openai-community/gpt2"
)
def _check_configuration_tie(self, model):
module = model.module.bind(model.params)

View File

@@ -764,7 +764,7 @@ class TFBertEncoderDecoderModelTest(TFEncoderDecoderMixin, unittest.TestCase):
def test_bert2bert_summarization(self):
from transformers import EncoderDecoderModel
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
"""Not working, because pt checkpoint has `encoder.encoder.layer...` while tf model has `encoder.bert.encoder.layer...`.
(For Bert decoder, there is no issue, because `BertModel` is wrapped into `decoder` as `bert`)
@@ -864,8 +864,8 @@ class TFGPT2EncoderDecoderModelTest(TFEncoderDecoderMixin, unittest.TestCase):
def test_bert2gpt2_summarization(self):
from transformers import EncoderDecoderModel
tokenizer_in = AutoTokenizer.from_pretrained("bert-base-cased")
tokenizer_out = AutoTokenizer.from_pretrained("gpt2")
tokenizer_in = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenizer_out = AutoTokenizer.from_pretrained("openai-community/gpt2")
"""Not working, because pt checkpoint has `encoder.encoder.layer...` while tf model has `encoder.bert.encoder.layer...`.
(For GPT2 decoder, there is no issue)
@@ -1016,10 +1016,12 @@ class TFRembertEncoderDecoderModelTest(TFEncoderDecoderMixin, unittest.TestCase)
@require_tf
class TFEncoderDecoderModelTest(unittest.TestCase):
def get_from_encoderdecoder_pretrained_model(self):
return TFEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "bert-base-cased")
return TFEncoderDecoderModel.from_encoder_decoder_pretrained(
"google-bert/bert-base-cased", "google-bert/bert-base-cased"
)
def get_decoder_config(self):
config = AutoConfig.from_pretrained("bert-base-cased")
config = AutoConfig.from_pretrained("google-bert/bert-base-cased")
config.is_decoder = True
config.add_cross_attention = True
return config
@@ -1028,9 +1030,9 @@ class TFEncoderDecoderModelTest(unittest.TestCase):
return TFEncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
def get_encoder_decoder_models(self):
encoder_model = TFBertModel.from_pretrained("bert-base-cased", name="encoder")
encoder_model = TFBertModel.from_pretrained("google-bert/bert-base-cased", name="encoder")
decoder_model = TFBertLMHeadModel.from_pretrained(
"bert-base-cased", config=self.get_decoder_config(), name="decoder"
"google-bert/bert-base-cased", config=self.get_decoder_config(), name="decoder"
)
return {"encoder": encoder_model, "decoder": decoder_model}
@@ -1055,8 +1057,10 @@ class TFEncoderDecoderModelTest(unittest.TestCase):
@require_tf
class TFEncoderDecoderModelSaveLoadTests(unittest.TestCase):
def get_encoder_decoder_config(self):
encoder_config = AutoConfig.from_pretrained("bert-base-uncased")
decoder_config = AutoConfig.from_pretrained("bert-base-uncased", is_decoder=True, add_cross_attention=True)
encoder_config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
decoder_config = AutoConfig.from_pretrained(
"google-bert/bert-base-uncased", is_decoder=True, add_cross_attention=True
)
return EncoderDecoderConfig.from_encoder_decoder_configs(encoder_config, decoder_config)
def get_encoder_decoder_config_small(self):
@@ -1160,8 +1164,8 @@ class TFEncoderDecoderModelSaveLoadTests(unittest.TestCase):
load_weight_prefix = TFEncoderDecoderModel.load_weight_prefix
config = self.get_encoder_decoder_config()
encoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
decoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
encoder_tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
decoder_tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
input_ids = encoder_tokenizer("who sings does he love me with reba", return_tensors="tf").input_ids
decoder_input_ids = decoder_tokenizer("Linda Davis", return_tensors="tf").input_ids
@@ -1173,10 +1177,10 @@ class TFEncoderDecoderModelSaveLoadTests(unittest.TestCase):
# So we create pretrained models (without `load_weight_prefix`), save them, and later,
# we load them using `from_pretrained`.
# (we don't need to do this for encoder, but let's make the code more similar between encoder/decoder)
encoder = TFAutoModel.from_pretrained("bert-base-uncased", name="encoder")
encoder = TFAutoModel.from_pretrained("google-bert/bert-base-uncased", name="encoder")
# It's necessary to specify `add_cross_attention=True` here.
decoder = TFAutoModelForCausalLM.from_pretrained(
"bert-base-uncased", is_decoder=True, add_cross_attention=True, name="decoder"
"google-bert/bert-base-uncased", is_decoder=True, add_cross_attention=True, name="decoder"
)
pretrained_encoder_dir = os.path.join(tmp_dirname, "pretrained_encoder")
pretrained_decoder_dir = os.path.join(tmp_dirname, "pretrained_decoder")

View File

@@ -237,10 +237,10 @@ class FlaxGPT2ModelTest(FlaxModelTesterMixin, FlaxGenerationTesterMixin, unittes
@slow
def test_batch_generation(self):
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", pad_token="</s>", padding_side="left")
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2", pad_token="</s>", padding_side="left")
inputs = tokenizer(["Hello this is a long string", "Hey"], return_tensors="np", padding=True, truncation=True)
model = FlaxGPT2LMHeadModel.from_pretrained("gpt2")
model = FlaxGPT2LMHeadModel.from_pretrained("openai-community/gpt2")
model.do_sample = False
model.config.pad_token_id = model.config.eos_token_id
@@ -359,6 +359,6 @@ class FlaxGPT2ModelTest(FlaxModelTesterMixin, FlaxGenerationTesterMixin, unittes
@slow
def test_model_from_pretrained(self):
for model_class_name in self.all_model_classes:
model = model_class_name.from_pretrained("gpt2", from_pt=True)
model = model_class_name.from_pretrained("openai-community/gpt2", from_pt=True)
outputs = model(np.ones((1, 1)))
self.assertIsNotNone(outputs)

View File

@@ -98,7 +98,7 @@ class GPT2ModelTester:
self.pad_token_id = vocab_size - 1
def get_large_model_config(self):
return GPT2Config.from_pretrained("gpt2")
return GPT2Config.from_pretrained("openai-community/gpt2")
def prepare_config_and_inputs(
self, gradient_checkpointing=False, scale_attn_by_inverse_layer_idx=False, reorder_and_upcast_attn=False
@@ -582,9 +582,9 @@ class GPT2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
@slow
def test_batch_generation(self):
model = GPT2LMHeadModel.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
model.to(torch_device)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
tokenizer.padding_side = "left"
@@ -641,9 +641,9 @@ class GPT2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
@slow
def test_batch_generation_2heads(self):
model = GPT2DoubleHeadsModel.from_pretrained("gpt2")
model = GPT2DoubleHeadsModel.from_pretrained("openai-community/gpt2")
model.to(torch_device)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
tokenizer.padding_side = "left"
@@ -722,7 +722,7 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
verify_outputs=True,
):
model = GPT2LMHeadModel.from_pretrained(
"gpt2",
"openai-community/gpt2",
reorder_and_upcast_attn=reorder_and_upcast_attn,
scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
)
@@ -759,8 +759,8 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_gpt2_sample(self):
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
model.to(torch_device)
torch.manual_seed(0)
@@ -787,8 +787,8 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_gpt2_sample_max_time(self):
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
model.to(torch_device)
torch.manual_seed(0)
@@ -833,8 +833,8 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
"laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based"
)
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2-large")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2-large").to(torch_device)
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
gpt2_model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2-large").to(torch_device)
input_ids = gpt2_tokenizer(article, return_tensors="pt").input_ids.to(torch_device)
outputs = gpt2_model.generate(input_ids, penalty_alpha=0.6, top_k=4, max_length=256)

View File

@@ -461,8 +461,8 @@ class TFGPT2ModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, PipelineTester
class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_greedy_distilgpt2_batch_special(self):
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
@@ -488,8 +488,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_sample_distilgpt2_batch_special(self):
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
@@ -522,8 +522,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_greedy_distilgpt2_beam_search_special(self):
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
@@ -550,8 +550,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_distilgpt2_left_padding(self):
"""Tests that the generated text is the same, regarless of left padding"""
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
@@ -582,8 +582,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_gpt2_greedy_xla(self):
model = TFGPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
@@ -612,8 +612,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
# forces the generation to happen on CPU, to avoid GPU-related quirks
with tf.device(":/CPU:0"):
model = TFGPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
@@ -642,8 +642,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_gpt2_beam_search_xla(self):
model = TFGPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
@@ -671,8 +671,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
"laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based"
)
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2-large")
gpt2_model = TFGPT2LMHeadModel.from_pretrained("gpt2-large")
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
gpt2_model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2-large")
input_ids = gpt2_tokenizer(article, return_tensors="tf")
outputs = gpt2_model.generate(**input_ids, penalty_alpha=0.6, top_k=4, max_length=256)
@@ -705,8 +705,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
"laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based"
)
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2-large")
gpt2_model = TFGPT2LMHeadModel.from_pretrained("gpt2-large")
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
gpt2_model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2-large")
input_ids = gpt2_tokenizer(article, return_tensors="tf")
xla_generate = tf.function(gpt2_model.generate, jit_compile=True)

View File

@@ -15,8 +15,8 @@ if is_keras_nlp_available():
from transformers.models.gpt2 import TFGPT2Tokenizer
TOKENIZER_CHECKPOINTS = ["gpt2"]
TINY_MODEL_CHECKPOINT = "gpt2"
TOKENIZER_CHECKPOINTS = ["openai-community/gpt2"]
TINY_MODEL_CHECKPOINT = "openai-community/gpt2"
if is_tf_available():

View File

@@ -202,7 +202,9 @@ class FlaxGPTNeoModelTest(FlaxModelTesterMixin, FlaxGenerationTesterMixin, unitt
@slow
def test_batch_generation(self):
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", pad_token="<|endoftext|>", padding_side="left")
tokenizer = GPT2Tokenizer.from_pretrained(
"openai-community/gpt2", pad_token="<|endoftext|>", padding_side="left"
)
inputs = tokenizer(["Hello this is a long string", "Hey"], return_tensors="np", padding=True, truncation=True)
model = FlaxGPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-125M")

View File

@@ -199,7 +199,9 @@ class FlaxGPTJModelTest(FlaxModelTesterMixin, FlaxGenerationTesterMixin, unittes
@tooslow
def test_batch_generation(self):
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", pad_token="<|endoftext|>", padding_side="left")
tokenizer = GPT2Tokenizer.from_pretrained(
"openai-community/gpt2", pad_token="<|endoftext|>", padding_side="left"
)
inputs = tokenizer(["Hello this is a long string", "Hey"], return_tensors="np", padding=True, truncation=True)
model = FlaxGPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")

View File

@@ -28,7 +28,7 @@ from ...test_tokenization_common import TokenizerTesterMixin
@require_tokenizers
# Copied from tests.models.roberta.test_tokenization_roberta.RobertaTokenizationTest with roberta-base->allenai/longformer-base-4096,Roberta->Longformer,roberta->longformer,
# Copied from tests.models.roberta.test_tokenization_roberta.RobertaTokenizationTest with FacebookAI/roberta-base->allenai/longformer-base-4096,Roberta->Longformer,roberta->longformer,
class LongformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
# Ignore copy
tokenizer_class = LongformerTokenizer

View File

@@ -1373,7 +1373,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
inputs = new_tokenizer(text, xpaths=xpaths)
self.assertEqual(len(inputs["input_ids"]), 2)
decoded_input = new_tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
expected_result = ( # original expected result "this is the" seems contradicts to roberta-based tokenizer
expected_result = ( # original expected result "this is the" seems contradicts to FacebookAI/roberta-based tokenizer
"thisisthe"
)

View File

@@ -258,7 +258,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
)
@slow
# Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_sequence_builders with bert-base-uncased->google/mobilebert-uncased
# Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_sequence_builders with google-bert/bert-base-uncased->google/mobilebert-uncased
def test_sequence_builders(self):
tokenizer = self.tokenizer_class.from_pretrained("google/mobilebert-uncased")

View File

@@ -104,7 +104,7 @@ class MT5ModelTester:
self.decoder_layers = decoder_layers
def get_large_model_config(self):
return MT5Config.from_pretrained("t5-base")
return MT5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2)
@@ -940,7 +940,7 @@ class MT5EncoderOnlyModelTester:
self.is_training = is_training
def get_large_model_config(self):
return MT5Config.from_pretrained("t5-base")
return MT5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)

View File

@@ -279,7 +279,7 @@ class OpenAIGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
class OPENAIGPTModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_openai_gpt(self):
model = OpenAIGPTLMHeadModel.from_pretrained("openai-gpt")
model = OpenAIGPTLMHeadModel.from_pretrained("openai-community/openai-gpt")
model.to(torch_device)
input_ids = torch.tensor([[481, 4735, 544]], dtype=torch.long, device=torch_device) # the president is
expected_output_ids = [

View File

@@ -262,7 +262,7 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Tes
class TFOPENAIGPTModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_openai_gpt(self):
model = TFOpenAIGPTLMHeadModel.from_pretrained("openai-gpt")
model = TFOpenAIGPTLMHeadModel.from_pretrained("openai-community/openai-gpt")
input_ids = tf.convert_to_tensor([[481, 4735, 544]], dtype=tf.int32) # the president is
expected_output_ids = [
481,

View File

@@ -41,7 +41,7 @@ class Pix2StructProcessorTest(unittest.TestCase):
self.tmpdirname = tempfile.mkdtemp()
image_processor = Pix2StructImageProcessor()
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
processor = Pix2StructProcessor(image_processor, tokenizer)

View File

@@ -563,7 +563,7 @@ class QDQBertModelIntegrationTest(unittest.TestCase):
quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
model = QDQBertModel.from_pretrained("bert-base-uncased")
model = QDQBertModel.from_pretrained("google-bert/bert-base-uncased")
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
output = model(input_ids, attention_mask=attention_mask)[0]

View File

@@ -236,7 +236,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@slow
def test_sequence_builders(self):
tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased")
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
text = tokenizer.encode("sequence builders", add_special_tokens=False)
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)

View File

@@ -154,6 +154,6 @@ class FlaxRobertaModelTest(FlaxModelTesterMixin, unittest.TestCase):
@slow
def test_model_from_pretrained(self):
for model_class_name in self.all_model_classes:
model = model_class_name.from_pretrained("roberta-base", from_pt=True)
model = model_class_name.from_pretrained("FacebookAI/roberta-base", from_pt=True)
outputs = model(np.ones((1, 1)))
self.assertIsNotNone(outputs)

View File

@@ -527,7 +527,7 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
class RobertaModelIntegrationTest(TestCasePlus):
@slow
def test_inference_masked_lm(self):
model = RobertaForMaskedLM.from_pretrained("roberta-base")
model = RobertaForMaskedLM.from_pretrained("FacebookAI/roberta-base")
input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
with torch.no_grad():
@@ -547,7 +547,7 @@ class RobertaModelIntegrationTest(TestCasePlus):
@slow
def test_inference_no_head(self):
model = RobertaModel.from_pretrained("roberta-base")
model = RobertaModel.from_pretrained("FacebookAI/roberta-base")
input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
with torch.no_grad():
@@ -565,7 +565,7 @@ class RobertaModelIntegrationTest(TestCasePlus):
@slow
def test_inference_classification_head(self):
model = RobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
model = RobertaForSequenceClassification.from_pretrained("FacebookAI/roberta-large-mnli")
input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
with torch.no_grad():

View File

@@ -666,7 +666,7 @@ class TFRobertaModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC
class TFRobertaModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_masked_lm(self):
model = TFRobertaForMaskedLM.from_pretrained("roberta-base")
model = TFRobertaForMaskedLM.from_pretrained("FacebookAI/roberta-base")
input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0]
@@ -680,7 +680,7 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_no_head(self):
model = TFRobertaModel.from_pretrained("roberta-base")
model = TFRobertaModel.from_pretrained("FacebookAI/roberta-base")
input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0]
@@ -692,7 +692,7 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
@slow
def test_inference_classification_head(self):
model = TFRobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
model = TFRobertaForSequenceClassification.from_pretrained("FacebookAI/roberta-large-mnli")
input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0]

View File

@@ -105,7 +105,7 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@slow
def test_sequence_builders(self):
tokenizer = self.tokenizer_class.from_pretrained("roberta-base")
tokenizer = self.tokenizer_class.from_pretrained("FacebookAI/roberta-base")
text = tokenizer.encode("sequence builders", add_special_tokens=False)
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)

View File

@@ -134,7 +134,7 @@ class FlaxRobertaPreLayerNormModelTester(unittest.TestCase):
@require_flax
# Copied from tests.models.roberta.test_modeling_flax_roberta.FlaxRobertaModelTest with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,roberta-base->andreasmadsen/efficient_mlm_m0.40
# Copied from tests.models.roberta.test_modeling_flax_roberta.FlaxRobertaModelTest with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,FacebookAI/roberta-base->andreasmadsen/efficient_mlm_m0.40
class FlaxRobertaPreLayerNormModelTest(FlaxModelTesterMixin, unittest.TestCase):
test_head_masking = True

View File

@@ -578,7 +578,7 @@ class FlaxEncoderDecoderMixin:
class FlaxWav2Vec2GPT2ModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
def get_pretrained_model_and_inputs(self):
model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
"facebook/wav2vec2-large-lv60", "gpt2-medium"
"facebook/wav2vec2-large-lv60", "openai-community/gpt2-medium"
)
batch_size = 13
input_values = floats_tensor([batch_size, 512], scale=1.0)
@@ -812,7 +812,7 @@ class FlaxWav2Vec2BartModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
class FlaxWav2Vec2BertModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
def get_pretrained_model_and_inputs(self):
model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
"facebook/wav2vec2-large-lv60", "bert-large-uncased"
"facebook/wav2vec2-large-lv60", "google-bert/bert-large-uncased"
)
batch_size = 13
input_values = floats_tensor([batch_size, 512], model.config.encoder.vocab_size)

View File

@@ -445,7 +445,7 @@ class EncoderDecoderMixin:
class Wav2Vec2BertModelTest(EncoderDecoderMixin, unittest.TestCase):
def get_pretrained_model_and_inputs(self):
model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
"facebook/wav2vec2-base-960h", "bert-base-cased"
"facebook/wav2vec2-base-960h", "google-bert/bert-base-cased"
)
batch_size = 13
input_values = floats_tensor([batch_size, 512], scale=1.0)
@@ -509,7 +509,7 @@ class Wav2Vec2BertModelTest(EncoderDecoderMixin, unittest.TestCase):
class Speech2TextBertModelTest(EncoderDecoderMixin, unittest.TestCase):
def get_pretrained_model_and_inputs(self):
model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
"facebook/s2t-small-librispeech-asr", "bert-base-cased"
"facebook/s2t-small-librispeech-asr", "google-bert/bert-base-cased"
)
batch_size = 13
input_features = floats_tensor([batch_size, 7, 80], scale=1.0)

View File

@@ -1065,7 +1065,7 @@ class SwitchTransformerModelIntegrationTests(unittest.TestCase):
model = SwitchTransformersForConditionalGeneration.from_pretrained(
"google/switch-base-8", torch_dtype=torch.bfloat16
).eval()
tokenizer = AutoTokenizer.from_pretrained("t5-small", use_fast=False, legacy=False)
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small", use_fast=False, legacy=False)
model = model.to(torch_device)
input_ids = tokenizer(
@@ -1093,7 +1093,7 @@ class SwitchTransformerModelIntegrationTests(unittest.TestCase):
model = SwitchTransformersForConditionalGeneration.from_pretrained(
"google/switch-base-8", torch_dtype=torch.bfloat16
).eval()
tokenizer = AutoTokenizer.from_pretrained("t5-small", use_fast=False, legacy=False)
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small", use_fast=False, legacy=False)
inputs = [
"A <extra_id_0> walks into a bar and orders a <extra_id_1> with <extra_id_2> pinch of <extra_id_3>."

View File

@@ -773,8 +773,8 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
"""
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("Hello there", return_tensors="np").input_ids
labels = tokenizer("Hi I am", return_tensors="np").input_ids
@@ -849,11 +849,11 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_small_generation(self):
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
model.config.max_length = 8
model.config.num_beams = 1
model.config.do_sample = False
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("summarize: Hello there", return_tensors="np").input_ids
@@ -864,11 +864,11 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_small_generation_bfloat16(self):
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small", dtype=jnp.bfloat16)
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small", dtype=jnp.bfloat16)
model.config.max_length = 8
model.config.num_beams = 1
model.config.do_sample = False
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("summarize: Hello there", return_tensors="np").input_ids
@@ -879,8 +879,8 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_summarization(self):
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("t5-base")
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
FRANCE_ARTICLE = ( # @noqa
"Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings"

View File

@@ -108,7 +108,7 @@ class T5ModelTester:
self.decoder_layers = decoder_layers
def get_large_model_config(self):
return T5Config.from_pretrained("t5-base")
return T5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2)
@@ -942,7 +942,7 @@ class T5EncoderOnlyModelTester:
self.is_training = is_training
def get_large_model_config(self):
return T5Config.from_pretrained("t5-base")
return T5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
@@ -1096,36 +1096,40 @@ class T5ModelFp16Tests(unittest.TestCase):
with unittest.mock.patch("builtins.__import__", side_effect=import_accelerate_mock):
accelerate_available = False
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.float16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
# Load without in bf16
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load using `accelerate` in bf16
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16, device_map="auto")
model = T5ForConditionalGeneration.from_pretrained(
"google-t5/t5-small", torch_dtype=torch.bfloat16, device_map="auto"
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load using `accelerate` in bf16
model = T5ForConditionalGeneration.from_pretrained(
"t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
"google-t5/t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
# Load without using `accelerate`
model = T5ForConditionalGeneration.from_pretrained(
"t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
"google-t5/t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
# Load using `accelerate`
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16, device_map="auto")
model = T5ForConditionalGeneration.from_pretrained(
"google-t5/t5-small", torch_dtype=torch.float16, device_map="auto"
)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
@@ -1136,11 +1140,11 @@ class T5ModelFp16Tests(unittest.TestCase):
class T5ModelIntegrationTests(unittest.TestCase):
@cached_property
def model(self):
return T5ForConditionalGeneration.from_pretrained("t5-base").to(torch_device)
return T5ForConditionalGeneration.from_pretrained("google-t5/t5-base").to(torch_device)
@cached_property
def tokenizer(self):
return T5Tokenizer.from_pretrained("t5-base")
return T5Tokenizer.from_pretrained("google-t5/t5-base")
@slow
def test_torch_quant(self):
@@ -1157,11 +1161,11 @@ class T5ModelIntegrationTests(unittest.TestCase):
@slow
def test_small_generation(self):
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
model.config.max_length = 8
model.config.num_beams = 1
model.config.do_sample = False
tokenizer = T5Tokenizer.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("summarize: Hello there", return_tensors="pt").input_ids.to(torch_device)
@@ -1184,8 +1188,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
"""
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("Hello there", return_tensors="pt").input_ids
labels = tokenizer("Hi I am", return_tensors="pt").input_ids
@@ -1501,7 +1505,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_fr(self):
model = self.model # t5-base
model = self.model # google-t5/t5-base
tok = self.tokenizer
use_task_specific_params(model, "translation_en_to_fr")

View File

@@ -302,7 +302,7 @@ class TFT5ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
@slow
def test_model_from_pretrained(self):
model = TFT5Model.from_pretrained("t5-small")
model = TFT5Model.from_pretrained("google-t5/t5-small")
self.assertIsNotNone(model)
def test_generate_with_headmasking(self):
@@ -448,8 +448,8 @@ class TFT5EncoderOnlyModelTest(TFModelTesterMixin, unittest.TestCase):
class TFT5GenerationIntegrationTests(unittest.TestCase):
@slow
def test_greedy_xla_generate_simple(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
# two examples with different lengths to confirm that attention masks are operational in XLA
sentences = [
@@ -476,8 +476,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
@slow
def test_greedy_generate(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
sentences = ["Yesterday, my name was", "Today is a beautiful day and"]
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
@@ -505,8 +505,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
# forces the generation to happen on CPU, to avoid GPU-related quirks
with tf.device(":/CPU:0"):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
sentence = "Translate English to German: I have two bananas"
input_ids = tokenizer(sentence, return_tensors="tf", padding=True).input_ids
@@ -526,8 +526,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
@slow
def test_sample_generate(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
sentences = ["I really love my", "Translate English to German: the transformers are truly amazing"]
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
@@ -557,8 +557,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
@unittest.skip("Skip for now as TF 2.13 breaks it on GPU")
@slow
def test_beam_search_xla_generate_simple(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
# tests XLA with task specific arguments
task_specific_config = getattr(model.config, "task_specific_params", {})
@@ -590,8 +590,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
@slow
def test_beam_search_generate(self):
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
sentences = ["I really love my", "Translate English to German: the transformers are truly amazing"]
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
@@ -622,7 +622,7 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
class TFT5ModelIntegrationTests(unittest.TestCase):
@cached_property
def model(self):
return TFT5ForConditionalGeneration.from_pretrained("t5-base")
return TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
@slow
def test_small_integration_test(self):
@@ -638,8 +638,8 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
"""
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
input_ids = tokenizer("Hello there", return_tensors="tf").input_ids
labels = tokenizer("Hi I am", return_tensors="tf").input_ids
@@ -703,7 +703,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_summarization(self):
model = self.model
tok = T5Tokenizer.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
FRANCE_ARTICLE = ( # @noqa
"Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings"
@@ -948,7 +948,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_de(self):
tok = T5Tokenizer.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
model = self.model
task_specific_config = getattr(model.config, "task_specific_params", {})
@@ -978,7 +978,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_fr(self):
model = self.model
tok = T5Tokenizer.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
task_specific_config = getattr(model.config, "task_specific_params", {})
translation_config = task_specific_config.get("translation_en_to_fr", {})
@@ -1015,7 +1015,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
@slow
def test_translation_en_to_ro(self):
model = self.model
tok = T5Tokenizer.from_pretrained("t5-base")
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
task_specific_config = getattr(model.config, "task_specific_params", {})
translation_config = task_specific_config.get("translation_en_to_ro", {})

View File

@@ -138,11 +138,11 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@cached_property
def t5_base_tokenizer(self):
return T5Tokenizer.from_pretrained("t5-base")
return T5Tokenizer.from_pretrained("google-t5/t5-base")
@cached_property
def t5_base_tokenizer_fast(self):
return T5TokenizerFast.from_pretrained("t5-base")
return T5TokenizerFast.from_pretrained("google-t5/t5-base")
def get_tokenizer(self, **kwargs) -> T5Tokenizer:
return self.tokenizer_class.from_pretrained(self.tmpdirname, **kwargs)
@@ -373,7 +373,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,
model_name="t5-base",
model_name="google-t5/t5-base",
revision="5a7ff2d8f5117c194c7e32ec1ccbf04642cca99b",
)
@@ -400,7 +400,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010)))
def test_some_edge_cases(self):
tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base", legacy=False)
sp_tokens = tokenizer.sp_model.encode("</s>>", out_type=str)
self.assertEqual(sp_tokens, ["<", "/", "s", ">", ">"])
@@ -426,8 +426,8 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_fast_slow_edge_cases(self):
# We are testing spaces before and spaces after special tokens + space transformations
slow_tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
fast_tokenizer = T5TokenizerFast.from_pretrained("t5-base", legacy=False, from_slow=True)
slow_tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base", legacy=False)
fast_tokenizer = T5TokenizerFast.from_pretrained("google-t5/t5-base", legacy=False, from_slow=True)
slow_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=False))
fast_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=False))
@@ -445,7 +445,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
with self.subTest(f"fast {edge_case} normalized = False"):
self.assertEqual(fast_tokenizer.tokenize(hard_case), EXPECTED_SLOW)
fast_tokenizer = T5TokenizerFast.from_pretrained("t5-base", legacy=False, from_slow=True)
fast_tokenizer = T5TokenizerFast.from_pretrained("google-t5/t5-base", legacy=False, from_slow=True)
fast_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=True))
# `normalized=True` is the default normalization scheme when adding a token. Normalize -> don't strip the space.
@@ -604,7 +604,7 @@ class CommonSpmIntegrationTests(unittest.TestCase):
)
# Test with T5
hf_tokenizer = T5Tokenizer.from_pretrained("t5-small")
hf_tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
vocab_path = "gs://t5-data/vocabs/cc_all.32000/sentencepiece.model"
t5x_tokenizer = SentencePieceVocabulary(vocab_path, extra_ids=300)
for text in input_texts:

View File

@@ -603,7 +603,7 @@ class UMT5EncoderOnlyModelTester:
self.is_training = is_training
def get_large_model_config(self):
return UMT5Config.from_pretrained("t5-base")
return UMT5Config.from_pretrained("google-t5/t5-base")
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)

View File

@@ -426,7 +426,7 @@ class FlaxViT2GPT2EncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.Test
def get_pretrained_model(self):
return FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
"google/vit-base-patch16-224-in21k", "gpt2"
"google/vit-base-patch16-224-in21k", "openai-community/gpt2"
)
@@ -434,7 +434,7 @@ class FlaxViT2GPT2EncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.Test
class FlaxVisionEncoderDecoderModelTest(unittest.TestCase):
def get_from_encoderdecoder_pretrained_model(self):
return FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
"google/vit-base-patch16-224-in21k", "gpt2"
"google/vit-base-patch16-224-in21k", "openai-community/gpt2"
)
def _check_configuration_tie(self, model):

View File

@@ -627,7 +627,9 @@ class TFVisionEncoderDecoderMixin:
@require_tf
class TFViT2GPT2EncoderDecoderModelTest(TFVisionEncoderDecoderMixin, unittest.TestCase):
def get_pretrained_model(self):
return TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained("google/vit-base-patch16-224-in21k", "gpt2")
return TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
"google/vit-base-patch16-224-in21k", "openai-community/gpt2"
)
def get_encoder_decoder_model(self, config, decoder_config):
encoder_model = TFViTModel(config, name="encoder")
@@ -672,10 +674,12 @@ class TFViT2GPT2EncoderDecoderModelTest(TFVisionEncoderDecoderMixin, unittest.Te
@require_tf
class TFVisionEncoderDecoderModelTest(unittest.TestCase):
def get_from_encoderdecoder_pretrained_model(self):
return TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained("google/vit-base-patch16-224-in21k", "gpt2")
return TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
"google/vit-base-patch16-224-in21k", "openai-community/gpt2"
)
def get_decoder_config(self):
config = AutoConfig.from_pretrained("gpt2")
config = AutoConfig.from_pretrained("openai-community/gpt2")
config.is_decoder = True
config.add_cross_attention = True
return config
@@ -685,7 +689,9 @@ class TFVisionEncoderDecoderModelTest(unittest.TestCase):
def get_encoder_decoder_models(self):
encoder_model = TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k", name="encoder")
decoder_model = TFGPT2LMHeadModel.from_pretrained("gpt2", config=self.get_decoder_config(), name="decoder")
decoder_model = TFGPT2LMHeadModel.from_pretrained(
"openai-community/gpt2", config=self.get_decoder_config(), name="decoder"
)
return {"encoder": encoder_model, "decoder": decoder_model}
def _check_configuration_tie(self, model):
@@ -714,7 +720,7 @@ def prepare_img():
class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
def get_encoder_decoder_config(self):
encoder_config = AutoConfig.from_pretrained("google/vit-base-patch16-224-in21k")
decoder_config = AutoConfig.from_pretrained("gpt2", is_decoder=True, add_cross_attention=True)
decoder_config = AutoConfig.from_pretrained("openai-community/gpt2", is_decoder=True, add_cross_attention=True)
return VisionEncoderDecoderConfig.from_encoder_decoder_configs(encoder_config, decoder_config)
def get_encoder_decoder_config_small(self):
@@ -829,7 +835,7 @@ class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
config = self.get_encoder_decoder_config()
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
decoder_tokenizer = AutoTokenizer.from_pretrained("gpt2")
decoder_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
img = prepare_img()
pixel_values = image_processor(images=img, return_tensors="tf").pixel_values
@@ -845,7 +851,7 @@ class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
encoder = TFAutoModel.from_pretrained("google/vit-base-patch16-224-in21k", name="encoder")
# It's necessary to specify `add_cross_attention=True` here.
decoder = TFAutoModelForCausalLM.from_pretrained(
"gpt2", is_decoder=True, add_cross_attention=True, name="decoder"
"openai-community/gpt2", is_decoder=True, add_cross_attention=True, name="decoder"
)
pretrained_encoder_dir = os.path.join(tmp_dirname, "pretrained_encoder")
pretrained_decoder_dir = os.path.join(tmp_dirname, "pretrained_decoder")

View File

@@ -369,7 +369,7 @@ class TFXLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
class TFXLMModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_xlm_mlm_en_2048(self):
model = TFXLMWithLMHeadModel.from_pretrained("xlm-mlm-en-2048")
model = TFXLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-mlm-en-2048")
input_ids = tf.convert_to_tensor([[14, 447]], dtype=tf.int32) # the president
expected_output_ids = [
14,

View File

@@ -514,7 +514,7 @@ class XLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
class XLMModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_xlm_mlm_en_2048(self):
model = XLMWithLMHeadModel.from_pretrained("xlm-mlm-en-2048")
model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-mlm-en-2048")
model.to(torch_device)
input_ids = torch.tensor([[14, 447]], dtype=torch.long, device=torch_device) # the president
expected_output_ids = [

View File

@@ -85,7 +85,7 @@ class XLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@slow
def test_sequence_builders(self):
tokenizer = XLMTokenizer.from_pretrained("xlm-mlm-en-2048")
tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-mlm-en-2048")
text = tokenizer.encode("sequence builders", add_special_tokens=False)
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)

View File

@@ -32,8 +32,8 @@ if is_flax_available():
class FlaxXLMRobertaModelIntegrationTest(unittest.TestCase):
@slow
def test_flax_xlm_roberta_base(self):
model = FlaxXLMRobertaModel.from_pretrained("xlm-roberta-base")
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
model = FlaxXLMRobertaModel.from_pretrained("FacebookAI/xlm-roberta-base")
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
text = "The dog is cute and lives in the garden house"
input_ids = jnp.array([tokenizer.encode(text)])

View File

@@ -32,7 +32,7 @@ if is_torch_available():
class XLMRobertaModelIntegrationTest(unittest.TestCase):
@slow
def test_xlm_roberta_base(self):
model = XLMRobertaModel.from_pretrained("xlm-roberta-base")
model = XLMRobertaModel.from_pretrained("FacebookAI/xlm-roberta-base")
input_ids = torch.tensor([[0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]])
# The dog is cute and lives in the garden house
@@ -51,7 +51,7 @@ class XLMRobertaModelIntegrationTest(unittest.TestCase):
@slow
def test_xlm_roberta_large(self):
model = XLMRobertaModel.from_pretrained("xlm-roberta-large")
model = XLMRobertaModel.from_pretrained("FacebookAI/xlm-roberta-large")
input_ids = torch.tensor([[0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]])
# The dog is cute and lives in the garden house

View File

@@ -212,7 +212,7 @@ class XLMRobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@cached_property
def big_tokenizer(self):
return XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
return XLMRobertaTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
def test_picklable_without_disk(self):
with tempfile.NamedTemporaryFile() as f:
@@ -338,6 +338,6 @@ class XLMRobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,
model_name="xlm-roberta-base",
model_name="FacebookAI/xlm-roberta-base",
revision="d9d8a8ea5eb94b1c6654ae9249df7793cd2933d3",
)

View File

@@ -491,7 +491,7 @@ class TFXLNetModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCas
class TFXLNetModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_xlnet_base_cased(self):
model = TFXLNetLMHeadModel.from_pretrained("xlnet-base-cased")
model = TFXLNetLMHeadModel.from_pretrained("xlnet/xlnet-base-cased")
# fmt: off
input_ids = tf.convert_to_tensor(
[

View File

@@ -694,7 +694,7 @@ class XLNetModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
class XLNetModelLanguageGenerationTest(unittest.TestCase):
@slow
def test_lm_generate_xlnet_base_cased(self):
model = XLNetLMHeadModel.from_pretrained("xlnet-base-cased")
model = XLNetLMHeadModel.from_pretrained("xlnet/xlnet-base-cased")
model.to(torch_device)
# fmt: off
input_ids = torch.tensor(

View File

@@ -186,7 +186,7 @@ class XLNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@slow
def test_sequence_builders(self):
tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
text = tokenizer.encode("sequence builders", add_special_tokens=False)
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
@@ -203,6 +203,6 @@ class XLNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.tokenizer_integration_test_util(
expected_encoding=expected_encoding,
model_name="xlnet-base-cased",
model_name="xlnet/xlnet-base-cased",
revision="c841166438c31ec7ca9a106dee7bb312b73ae511",
)

View File

@@ -630,7 +630,7 @@ class XmodModelIntegrationTest(unittest.TestCase):
@slow
def test_end_to_end_mask_fill(self):
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
tokenizer = XLMRobertaTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
model = XmodForMaskedLM.from_pretrained("facebook/xmod-base", default_language="en_XX")
model.to(torch_device)