Update all references to canonical models (#29001)
* Script & Manual edition * Update
This commit is contained in:
@@ -331,7 +331,7 @@ class AlbertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
class AlbertModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_inference_no_head_absolute_embedding(self):
|
||||
model = AlbertModel.from_pretrained("albert-base-v2")
|
||||
model = AlbertModel.from_pretrained("albert/albert-base-v2")
|
||||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
|
||||
attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
|
||||
with torch.no_grad():
|
||||
|
||||
@@ -139,7 +139,7 @@ class FlaxAlbertModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_class_name in self.all_model_classes:
|
||||
model = model_class_name.from_pretrained("albert-base-v2")
|
||||
model = model_class_name.from_pretrained("albert/albert-base-v2")
|
||||
outputs = model(np.ones((1, 1)))
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
@@ -148,7 +148,7 @@ class FlaxAlbertModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
class FlaxAlbertModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_inference_no_head_absolute_embedding(self):
|
||||
model = FlaxAlbertModel.from_pretrained("albert-base-v2")
|
||||
model = FlaxAlbertModel.from_pretrained("albert/albert-base-v2")
|
||||
input_ids = np.array([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
|
||||
attention_mask = np.array([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
|
||||
output = model(input_ids, attention_mask=attention_mask)[0]
|
||||
|
||||
@@ -311,7 +311,7 @@ class TFAlbertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa
|
||||
class TFAlbertModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_inference_masked_lm(self):
|
||||
model = TFAlbertForPreTraining.from_pretrained("albert-base-v2")
|
||||
model = TFAlbertForPreTraining.from_pretrained("albert/albert-base-v2")
|
||||
input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
|
||||
output = model(input_ids)[0]
|
||||
|
||||
|
||||
@@ -127,6 +127,6 @@ class AlbertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
self.tokenizer_integration_test_util(
|
||||
expected_encoding=expected_encoding,
|
||||
model_name="albert-base-v2",
|
||||
model_name="albert/albert-base-v2",
|
||||
revision="6b6560eaf5ff2e250b00c50f380c5389a9c2d82e",
|
||||
)
|
||||
|
||||
@@ -46,7 +46,7 @@ class AutoConfigTest(unittest.TestCase):
|
||||
self.assertIsNotNone(importlib.util.find_spec("transformers.models.auto"))
|
||||
|
||||
def test_config_from_model_shortcut(self):
|
||||
config = AutoConfig.from_pretrained("bert-base-uncased")
|
||||
config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
|
||||
def test_config_model_type_from_local_file(self):
|
||||
|
||||
@@ -30,7 +30,7 @@ if is_flax_available():
|
||||
class FlaxAutoModelTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_bert_from_pretrained(self):
|
||||
for model_name in ["bert-base-cased", "bert-large-uncased"]:
|
||||
for model_name in ["google-bert/bert-base-cased", "google-bert/bert-large-uncased"]:
|
||||
with self.subTest(model_name):
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
@@ -42,7 +42,7 @@ class FlaxAutoModelTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_roberta_from_pretrained(self):
|
||||
for model_name in ["roberta-base", "roberta-large"]:
|
||||
for model_name in ["FacebookAI/roberta-base", "FacebookAI/roberta-large"]:
|
||||
with self.subTest(model_name):
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
@@ -54,7 +54,7 @@ class FlaxAutoModelTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_bert_jax_jit(self):
|
||||
for model_name in ["bert-base-cased", "bert-large-uncased"]:
|
||||
for model_name in ["google-bert/bert-base-cased", "google-bert/bert-large-uncased"]:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
model = FlaxBertModel.from_pretrained(model_name)
|
||||
tokens = tokenizer("Do you support jax jitted function?", return_tensors=TensorType.JAX)
|
||||
@@ -67,7 +67,7 @@ class FlaxAutoModelTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_roberta_jax_jit(self):
|
||||
for model_name in ["roberta-base", "roberta-large"]:
|
||||
for model_name in ["FacebookAI/roberta-base", "FacebookAI/roberta-large"]:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
model = FlaxRobertaModel.from_pretrained(model_name)
|
||||
tokens = tokenizer("Do you support jax jitted function?", return_tensors=TensorType.JAX)
|
||||
|
||||
@@ -85,7 +85,7 @@ if is_tf_available():
|
||||
class TFAutoModelTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
model_name = "bert-base-cased"
|
||||
model_name = "google-bert/bert-base-cased"
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
@@ -96,7 +96,7 @@ class TFAutoModelTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_model_for_pretraining_from_pretrained(self):
|
||||
model_name = "bert-base-cased"
|
||||
model_name = "google-bert/bert-base-cased"
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
@@ -155,7 +155,7 @@ class TFAutoModelTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_sequence_classification_model_from_pretrained(self):
|
||||
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
for model_name in ["bert-base-uncased"]:
|
||||
for model_name in ["google-bert/bert-base-uncased"]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
@@ -167,7 +167,7 @@ class TFAutoModelTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_question_answering_model_from_pretrained(self):
|
||||
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
for model_name in ["bert-base-uncased"]:
|
||||
for model_name in ["google-bert/bert-base-uncased"]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
|
||||
@@ -75,7 +75,7 @@ class TFPTAutoModelTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
for model_name in ["bert-base-uncased"]:
|
||||
for model_name in ["google-bert/bert-base-uncased"]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
@@ -91,7 +91,7 @@ class TFPTAutoModelTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_model_for_pretraining_from_pretrained(self):
|
||||
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
for model_name in ["bert-base-uncased"]:
|
||||
for model_name in ["google-bert/bert-base-uncased"]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
@@ -185,7 +185,7 @@ class TFPTAutoModelTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_sequence_classification_model_from_pretrained(self):
|
||||
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
for model_name in ["bert-base-uncased"]:
|
||||
for model_name in ["google-bert/bert-base-uncased"]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
@@ -201,7 +201,7 @@ class TFPTAutoModelTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_question_answering_model_from_pretrained(self):
|
||||
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
for model_name in ["bert-base-uncased"]:
|
||||
for model_name in ["google-bert/bert-base-uncased"]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
|
||||
@@ -176,12 +176,14 @@ class AutoTokenizerTest(unittest.TestCase):
|
||||
|
||||
@require_tokenizers
|
||||
def test_from_pretrained_use_fast_toggle(self):
|
||||
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
|
||||
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased"), BertTokenizerFast)
|
||||
self.assertIsInstance(
|
||||
AutoTokenizer.from_pretrained("google-bert/bert-base-cased", use_fast=False), BertTokenizer
|
||||
)
|
||||
self.assertIsInstance(AutoTokenizer.from_pretrained("google-bert/bert-base-cased"), BertTokenizerFast)
|
||||
|
||||
@require_tokenizers
|
||||
def test_do_lower_case(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", do_lower_case=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased", do_lower_case=False)
|
||||
sample = "Hello, world. How are you?"
|
||||
tokens = tokenizer.tokenize(sample)
|
||||
self.assertEqual("[UNK]", tokens[0])
|
||||
@@ -211,15 +213,15 @@ class AutoTokenizerTest(unittest.TestCase):
|
||||
self.assertEqual(tokenizer2.vocab_size, 12)
|
||||
|
||||
def test_auto_tokenizer_fast_no_slow(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("ctrl")
|
||||
tokenizer = AutoTokenizer.from_pretrained("Salesforce/ctrl")
|
||||
# There is no fast CTRL so this always gives us a slow tokenizer.
|
||||
self.assertIsInstance(tokenizer, CTRLTokenizer)
|
||||
|
||||
def test_get_tokenizer_config(self):
|
||||
# Check we can load the tokenizer config of an online model.
|
||||
config = get_tokenizer_config("bert-base-cased")
|
||||
config = get_tokenizer_config("google-bert/bert-base-cased")
|
||||
_ = config.pop("_commit_hash", None)
|
||||
# If we ever update bert-base-cased tokenizer config, this dict here will need to be updated.
|
||||
# If we ever update google-bert/bert-base-cased tokenizer config, this dict here will need to be updated.
|
||||
self.assertEqual(config, {"do_lower_case": False})
|
||||
|
||||
# This model does not have a tokenizer_config so we get back an empty dict.
|
||||
|
||||
@@ -627,7 +627,7 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
class BertModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_inference_no_head_absolute_embedding(self):
|
||||
model = BertModel.from_pretrained("bert-base-uncased")
|
||||
model = BertModel.from_pretrained("google-bert/bert-base-uncased")
|
||||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
|
||||
attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
|
||||
with torch.no_grad():
|
||||
|
||||
@@ -158,6 +158,6 @@ class FlaxBertModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
def test_model_from_pretrained(self):
|
||||
# Only check this for base model, not necessary for all model classes.
|
||||
# This will also help speed-up tests.
|
||||
model = FlaxBertModel.from_pretrained("bert-base-cased")
|
||||
model = FlaxBertModel.from_pretrained("google-bert/bert-base-cased")
|
||||
outputs = model(np.ones((1, 1)))
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
@@ -242,7 +242,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_sequence_builders(self):
|
||||
tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased")
|
||||
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
|
||||
|
||||
text = tokenizer.encode("sequence builders", add_special_tokens=False)
|
||||
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
|
||||
|
||||
@@ -16,7 +16,7 @@ if is_tensorflow_text_available():
|
||||
from transformers.models.bert import TFBertTokenizer
|
||||
|
||||
|
||||
TOKENIZER_CHECKPOINTS = ["bert-base-uncased", "bert-base-cased"]
|
||||
TOKENIZER_CHECKPOINTS = ["google-bert/bert-base-uncased", "google-bert/bert-base-cased"]
|
||||
TINY_MODEL_CHECKPOINT = "hf-internal-testing/tiny-bert-tf-only"
|
||||
|
||||
if is_tf_available():
|
||||
|
||||
@@ -488,7 +488,7 @@ class BertTokenizerMismatchTest(unittest.TestCase):
|
||||
" is called from."
|
||||
)
|
||||
)
|
||||
EXAMPLE_BERT_ID = "bert-base-cased"
|
||||
EXAMPLE_BERT_ID = "google-bert/bert-base-cased"
|
||||
with self.assertLogs("transformers", level="WARNING") as cm:
|
||||
BertJapaneseTokenizer.from_pretrained(EXAMPLE_BERT_ID)
|
||||
self.assertTrue(
|
||||
|
||||
@@ -31,7 +31,7 @@ if is_torch_available():
|
||||
class CamembertModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_output_embeds_base_model(self):
|
||||
model = CamembertModel.from_pretrained("camembert-base")
|
||||
model = CamembertModel.from_pretrained("almanach/camembert-base")
|
||||
model.to(torch_device)
|
||||
|
||||
input_ids = torch.tensor(
|
||||
|
||||
@@ -128,7 +128,7 @@ class CamembertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
self.tokenizer_integration_test_util(
|
||||
expected_encoding=expected_encoding,
|
||||
model_name="camembert-base",
|
||||
model_name="almanach/camembert-base",
|
||||
revision="3a0641d9a1aeb7e848a74299e7e4c4bca216b4cf",
|
||||
sequences=sequences,
|
||||
)
|
||||
|
||||
@@ -50,7 +50,7 @@ class DPRReaderTokenizationTest(BertTokenizationTest):
|
||||
|
||||
@slow
|
||||
def test_decode_best_spans(self):
|
||||
tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased")
|
||||
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
|
||||
|
||||
text_1 = tokenizer.encode("question sequence", add_special_tokens=False)
|
||||
text_2 = tokenizer.encode("title sequence", add_special_tokens=False)
|
||||
@@ -73,7 +73,7 @@ class DPRReaderTokenizationTest(BertTokenizationTest):
|
||||
|
||||
@slow
|
||||
def test_call(self):
|
||||
tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased")
|
||||
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
|
||||
|
||||
text_1 = tokenizer.encode("question sequence", add_special_tokens=False)
|
||||
text_2 = tokenizer.encode("title sequence", add_special_tokens=False)
|
||||
|
||||
@@ -671,7 +671,9 @@ class EncoderDecoderMixin:
|
||||
@require_torch
|
||||
class BertEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
def get_pretrained_model(self):
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "bert-base-cased")
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-base-cased", "google-bert/bert-base-cased"
|
||||
)
|
||||
|
||||
def get_encoder_decoder_model(self, config, decoder_config):
|
||||
encoder_model = BertModel(config)
|
||||
@@ -937,7 +939,9 @@ class RoBertaEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
}
|
||||
|
||||
def get_pretrained_model(self):
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained("roberta-base", "roberta-base")
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"FacebookAI/roberta-base", "FacebookAI/roberta-base"
|
||||
)
|
||||
|
||||
|
||||
@require_torch
|
||||
@@ -994,7 +998,9 @@ class GPT2EncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
}
|
||||
|
||||
def get_pretrained_model(self):
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-base-cased", "openai-community/gpt2"
|
||||
)
|
||||
|
||||
def test_encoder_decoder_model_shared_weights(self):
|
||||
pass
|
||||
@@ -1004,8 +1010,8 @@ class GPT2EncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2gpt2-cnn_dailymail-fp16")
|
||||
|
||||
model.to(torch_device)
|
||||
tokenizer_in = AutoTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer_out = AutoTokenizer.from_pretrained("gpt2")
|
||||
tokenizer_in = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
tokenizer_out = AutoTokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
ARTICLE_STUDENTS = """(CNN)Sigma Alpha Epsilon is under fire for a video showing party-bound fraternity members singing a racist chant. SAE's national chapter suspended the students, but University of Oklahoma President David Boren took it a step further, saying the university's affiliation with the fraternity is permanently done. The news is shocking, but it's not the first time SAE has faced controversy. SAE was founded March 9, 1856, at the University of Alabama, five years before the American Civil War, according to the fraternity website. When the war began, the group had fewer than 400 members, of which "369 went to war for the Confederate States and seven for the Union Army," the website says. The fraternity now boasts more than 200,000 living alumni, along with about 15,000 undergraduates populating 219 chapters and 20 "colonies" seeking full membership at universities. SAE has had to work hard to change recently after a string of member deaths, many blamed on the hazing of new recruits, SAE national President Bradley Cohen wrote in a message on the fraternity's website. The fraternity's website lists more than 130 chapters cited or suspended for "health and safety incidents" since 2010. At least 30 of the incidents involved hazing, and dozens more involved alcohol. However, the list is missing numerous incidents from recent months. Among them, according to various media outlets: Yale University banned the SAEs from campus activities last month after members allegedly tried to interfere with a sexual misconduct investigation connected to an initiation rite. Stanford University in December suspended SAE housing privileges after finding sorority members attending a fraternity function were subjected to graphic sexual content. And Johns Hopkins University in November suspended the fraternity for underage drinking. "The media has labeled us as the 'nation's deadliest fraternity,' " Cohen said. In 2011, for example, a student died while being coerced into excessive alcohol consumption, according to a lawsuit. SAE's previous insurer dumped the fraternity. "As a result, we are paying Lloyd's of London the highest insurance rates in the Greek-letter world," Cohen said. Universities have turned down SAE's attempts to open new chapters, and the fraternity had to close 12 in 18 months over hazing incidents."""
|
||||
|
||||
@@ -1067,7 +1073,7 @@ class ProphetNetEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
|
||||
def get_pretrained_model(self):
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"bert-large-uncased", "microsoft/prophetnet-large-uncased"
|
||||
"google-bert/bert-large-uncased", "microsoft/prophetnet-large-uncased"
|
||||
)
|
||||
|
||||
def test_encoder_decoder_model_shared_weights(self):
|
||||
@@ -1122,7 +1128,9 @@ class BartEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
}
|
||||
|
||||
def get_pretrained_model(self):
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained("bert-large-uncased", "facebook/bart-large")
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-large-uncased", "facebook/bart-large"
|
||||
)
|
||||
|
||||
def test_encoder_decoder_model_shared_weights(self):
|
||||
pass
|
||||
@@ -1131,10 +1139,12 @@ class BartEncoderDecoderModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
@require_torch
|
||||
class EncoderDecoderModelTest(unittest.TestCase):
|
||||
def get_from_encoderdecoder_pretrained_model(self):
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "bert-base-uncased")
|
||||
return EncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-base-uncased", "google-bert/bert-base-uncased"
|
||||
)
|
||||
|
||||
def get_decoder_config(self):
|
||||
config = AutoConfig.from_pretrained("bert-base-uncased")
|
||||
config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
|
||||
config.is_decoder = True
|
||||
config.add_cross_attention = True
|
||||
return config
|
||||
@@ -1143,8 +1153,10 @@ class EncoderDecoderModelTest(unittest.TestCase):
|
||||
return EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
|
||||
|
||||
def get_encoder_decoder_models(self):
|
||||
encoder_model = BertModel.from_pretrained("bert-base-uncased")
|
||||
decoder_model = BertLMHeadModel.from_pretrained("bert-base-uncased", config=self.get_decoder_config())
|
||||
encoder_model = BertModel.from_pretrained("google-bert/bert-base-uncased")
|
||||
decoder_model = BertLMHeadModel.from_pretrained(
|
||||
"google-bert/bert-base-uncased", config=self.get_decoder_config()
|
||||
)
|
||||
return {"encoder": encoder_model, "decoder": decoder_model}
|
||||
|
||||
def _check_configuration_tie(self, model):
|
||||
|
||||
@@ -483,12 +483,14 @@ class FlaxGPT2EncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.TestCase
|
||||
}
|
||||
|
||||
def get_pretrained_model(self):
|
||||
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
|
||||
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-base-cased", "openai-community/gpt2"
|
||||
)
|
||||
|
||||
@slow
|
||||
def test_bert2gpt2_summarization(self):
|
||||
tokenizer_in = AutoTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer_out = AutoTokenizer.from_pretrained("gpt2")
|
||||
tokenizer_in = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
tokenizer_out = AutoTokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
model = FlaxEncoderDecoderModel.from_pretrained(
|
||||
"patrickvonplaten/bert2gpt2-cnn_dailymail-fp16", pad_token_id=tokenizer_out.eos_token_id
|
||||
@@ -539,7 +541,9 @@ class FlaxBartEncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.TestCase
|
||||
}
|
||||
|
||||
def get_pretrained_model(self):
|
||||
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "facebook/bart-base")
|
||||
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-base-cased", "facebook/bart-base"
|
||||
)
|
||||
|
||||
|
||||
@require_flax
|
||||
@@ -576,13 +580,17 @@ class FlaxBertEncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.TestCase
|
||||
}
|
||||
|
||||
def get_pretrained_model(self):
|
||||
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "bert-base-cased")
|
||||
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-base-cased", "google-bert/bert-base-cased"
|
||||
)
|
||||
|
||||
|
||||
@require_flax
|
||||
class FlaxEncoderDecoderModelTest(unittest.TestCase):
|
||||
def get_from_encoderdecoder_pretrained_model(self):
|
||||
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
|
||||
return FlaxEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-base-cased", "openai-community/gpt2"
|
||||
)
|
||||
|
||||
def _check_configuration_tie(self, model):
|
||||
module = model.module.bind(model.params)
|
||||
|
||||
@@ -764,7 +764,7 @@ class TFBertEncoderDecoderModelTest(TFEncoderDecoderMixin, unittest.TestCase):
|
||||
def test_bert2bert_summarization(self):
|
||||
from transformers import EncoderDecoderModel
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
||||
|
||||
"""Not working, because pt checkpoint has `encoder.encoder.layer...` while tf model has `encoder.bert.encoder.layer...`.
|
||||
(For Bert decoder, there is no issue, because `BertModel` is wrapped into `decoder` as `bert`)
|
||||
@@ -864,8 +864,8 @@ class TFGPT2EncoderDecoderModelTest(TFEncoderDecoderMixin, unittest.TestCase):
|
||||
def test_bert2gpt2_summarization(self):
|
||||
from transformers import EncoderDecoderModel
|
||||
|
||||
tokenizer_in = AutoTokenizer.from_pretrained("bert-base-cased")
|
||||
tokenizer_out = AutoTokenizer.from_pretrained("gpt2")
|
||||
tokenizer_in = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
|
||||
tokenizer_out = AutoTokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
"""Not working, because pt checkpoint has `encoder.encoder.layer...` while tf model has `encoder.bert.encoder.layer...`.
|
||||
(For GPT2 decoder, there is no issue)
|
||||
@@ -1016,10 +1016,12 @@ class TFRembertEncoderDecoderModelTest(TFEncoderDecoderMixin, unittest.TestCase)
|
||||
@require_tf
|
||||
class TFEncoderDecoderModelTest(unittest.TestCase):
|
||||
def get_from_encoderdecoder_pretrained_model(self):
|
||||
return TFEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "bert-base-cased")
|
||||
return TFEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google-bert/bert-base-cased", "google-bert/bert-base-cased"
|
||||
)
|
||||
|
||||
def get_decoder_config(self):
|
||||
config = AutoConfig.from_pretrained("bert-base-cased")
|
||||
config = AutoConfig.from_pretrained("google-bert/bert-base-cased")
|
||||
config.is_decoder = True
|
||||
config.add_cross_attention = True
|
||||
return config
|
||||
@@ -1028,9 +1030,9 @@ class TFEncoderDecoderModelTest(unittest.TestCase):
|
||||
return TFEncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
|
||||
|
||||
def get_encoder_decoder_models(self):
|
||||
encoder_model = TFBertModel.from_pretrained("bert-base-cased", name="encoder")
|
||||
encoder_model = TFBertModel.from_pretrained("google-bert/bert-base-cased", name="encoder")
|
||||
decoder_model = TFBertLMHeadModel.from_pretrained(
|
||||
"bert-base-cased", config=self.get_decoder_config(), name="decoder"
|
||||
"google-bert/bert-base-cased", config=self.get_decoder_config(), name="decoder"
|
||||
)
|
||||
return {"encoder": encoder_model, "decoder": decoder_model}
|
||||
|
||||
@@ -1055,8 +1057,10 @@ class TFEncoderDecoderModelTest(unittest.TestCase):
|
||||
@require_tf
|
||||
class TFEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
||||
def get_encoder_decoder_config(self):
|
||||
encoder_config = AutoConfig.from_pretrained("bert-base-uncased")
|
||||
decoder_config = AutoConfig.from_pretrained("bert-base-uncased", is_decoder=True, add_cross_attention=True)
|
||||
encoder_config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
|
||||
decoder_config = AutoConfig.from_pretrained(
|
||||
"google-bert/bert-base-uncased", is_decoder=True, add_cross_attention=True
|
||||
)
|
||||
return EncoderDecoderConfig.from_encoder_decoder_configs(encoder_config, decoder_config)
|
||||
|
||||
def get_encoder_decoder_config_small(self):
|
||||
@@ -1160,8 +1164,8 @@ class TFEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
||||
load_weight_prefix = TFEncoderDecoderModel.load_weight_prefix
|
||||
|
||||
config = self.get_encoder_decoder_config()
|
||||
encoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
||||
decoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
||||
encoder_tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
||||
decoder_tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
||||
|
||||
input_ids = encoder_tokenizer("who sings does he love me with reba", return_tensors="tf").input_ids
|
||||
decoder_input_ids = decoder_tokenizer("Linda Davis", return_tensors="tf").input_ids
|
||||
@@ -1173,10 +1177,10 @@ class TFEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
||||
# So we create pretrained models (without `load_weight_prefix`), save them, and later,
|
||||
# we load them using `from_pretrained`.
|
||||
# (we don't need to do this for encoder, but let's make the code more similar between encoder/decoder)
|
||||
encoder = TFAutoModel.from_pretrained("bert-base-uncased", name="encoder")
|
||||
encoder = TFAutoModel.from_pretrained("google-bert/bert-base-uncased", name="encoder")
|
||||
# It's necessary to specify `add_cross_attention=True` here.
|
||||
decoder = TFAutoModelForCausalLM.from_pretrained(
|
||||
"bert-base-uncased", is_decoder=True, add_cross_attention=True, name="decoder"
|
||||
"google-bert/bert-base-uncased", is_decoder=True, add_cross_attention=True, name="decoder"
|
||||
)
|
||||
pretrained_encoder_dir = os.path.join(tmp_dirname, "pretrained_encoder")
|
||||
pretrained_decoder_dir = os.path.join(tmp_dirname, "pretrained_decoder")
|
||||
|
||||
@@ -237,10 +237,10 @@ class FlaxGPT2ModelTest(FlaxModelTesterMixin, FlaxGenerationTesterMixin, unittes
|
||||
|
||||
@slow
|
||||
def test_batch_generation(self):
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", pad_token="</s>", padding_side="left")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2", pad_token="</s>", padding_side="left")
|
||||
inputs = tokenizer(["Hello this is a long string", "Hey"], return_tensors="np", padding=True, truncation=True)
|
||||
|
||||
model = FlaxGPT2LMHeadModel.from_pretrained("gpt2")
|
||||
model = FlaxGPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
||||
model.do_sample = False
|
||||
model.config.pad_token_id = model.config.eos_token_id
|
||||
|
||||
@@ -359,6 +359,6 @@ class FlaxGPT2ModelTest(FlaxModelTesterMixin, FlaxGenerationTesterMixin, unittes
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_class_name in self.all_model_classes:
|
||||
model = model_class_name.from_pretrained("gpt2", from_pt=True)
|
||||
model = model_class_name.from_pretrained("openai-community/gpt2", from_pt=True)
|
||||
outputs = model(np.ones((1, 1)))
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
@@ -98,7 +98,7 @@ class GPT2ModelTester:
|
||||
self.pad_token_id = vocab_size - 1
|
||||
|
||||
def get_large_model_config(self):
|
||||
return GPT2Config.from_pretrained("gpt2")
|
||||
return GPT2Config.from_pretrained("openai-community/gpt2")
|
||||
|
||||
def prepare_config_and_inputs(
|
||||
self, gradient_checkpointing=False, scale_attn_by_inverse_layer_idx=False, reorder_and_upcast_attn=False
|
||||
@@ -582,9 +582,9 @@ class GPT2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
|
||||
@slow
|
||||
def test_batch_generation(self):
|
||||
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
||||
model.to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
tokenizer.padding_side = "left"
|
||||
|
||||
@@ -641,9 +641,9 @@ class GPT2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
|
||||
@slow
|
||||
def test_batch_generation_2heads(self):
|
||||
model = GPT2DoubleHeadsModel.from_pretrained("gpt2")
|
||||
model = GPT2DoubleHeadsModel.from_pretrained("openai-community/gpt2")
|
||||
model.to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
tokenizer.padding_side = "left"
|
||||
|
||||
@@ -722,7 +722,7 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
verify_outputs=True,
|
||||
):
|
||||
model = GPT2LMHeadModel.from_pretrained(
|
||||
"gpt2",
|
||||
"openai-community/gpt2",
|
||||
reorder_and_upcast_attn=reorder_and_upcast_attn,
|
||||
scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
|
||||
)
|
||||
@@ -759,8 +759,8 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_gpt2_sample(self):
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
||||
model.to(torch_device)
|
||||
|
||||
torch.manual_seed(0)
|
||||
@@ -787,8 +787,8 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_gpt2_sample_max_time(self):
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
||||
model.to(torch_device)
|
||||
|
||||
torch.manual_seed(0)
|
||||
@@ -833,8 +833,8 @@ class GPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
"laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based"
|
||||
)
|
||||
|
||||
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2-large")
|
||||
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2-large").to(torch_device)
|
||||
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
|
||||
gpt2_model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2-large").to(torch_device)
|
||||
input_ids = gpt2_tokenizer(article, return_tensors="pt").input_ids.to(torch_device)
|
||||
|
||||
outputs = gpt2_model.generate(input_ids, penalty_alpha=0.6, top_k=4, max_length=256)
|
||||
|
||||
@@ -461,8 +461,8 @@ class TFGPT2ModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, PipelineTester
|
||||
class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_greedy_distilgpt2_batch_special(self):
|
||||
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
|
||||
model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "left"
|
||||
@@ -488,8 +488,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_lm_generate_sample_distilgpt2_batch_special(self):
|
||||
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
|
||||
model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "left"
|
||||
@@ -522,8 +522,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_lm_generate_greedy_distilgpt2_beam_search_special(self):
|
||||
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
|
||||
model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "left"
|
||||
@@ -550,8 +550,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_distilgpt2_left_padding(self):
|
||||
"""Tests that the generated text is the same, regarless of left padding"""
|
||||
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
|
||||
model = TFGPT2LMHeadModel.from_pretrained("distilbert/distilgpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilbert/distilgpt2")
|
||||
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "left"
|
||||
@@ -582,8 +582,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_lm_generate_gpt2_greedy_xla(self):
|
||||
model = TFGPT2LMHeadModel.from_pretrained("gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "left"
|
||||
@@ -612,8 +612,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
# forces the generation to happen on CPU, to avoid GPU-related quirks
|
||||
with tf.device(":/CPU:0"):
|
||||
model = TFGPT2LMHeadModel.from_pretrained("gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "left"
|
||||
@@ -642,8 +642,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_lm_generate_gpt2_beam_search_xla(self):
|
||||
model = TFGPT2LMHeadModel.from_pretrained("gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.padding_side = "left"
|
||||
@@ -671,8 +671,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
"laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based"
|
||||
)
|
||||
|
||||
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2-large")
|
||||
gpt2_model = TFGPT2LMHeadModel.from_pretrained("gpt2-large")
|
||||
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
|
||||
gpt2_model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2-large")
|
||||
input_ids = gpt2_tokenizer(article, return_tensors="tf")
|
||||
|
||||
outputs = gpt2_model.generate(**input_ids, penalty_alpha=0.6, top_k=4, max_length=256)
|
||||
@@ -705,8 +705,8 @@ class TFGPT2ModelLanguageGenerationTest(unittest.TestCase):
|
||||
"laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based"
|
||||
)
|
||||
|
||||
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2-large")
|
||||
gpt2_model = TFGPT2LMHeadModel.from_pretrained("gpt2-large")
|
||||
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
|
||||
gpt2_model = TFGPT2LMHeadModel.from_pretrained("openai-community/gpt2-large")
|
||||
input_ids = gpt2_tokenizer(article, return_tensors="tf")
|
||||
|
||||
xla_generate = tf.function(gpt2_model.generate, jit_compile=True)
|
||||
|
||||
@@ -15,8 +15,8 @@ if is_keras_nlp_available():
|
||||
from transformers.models.gpt2 import TFGPT2Tokenizer
|
||||
|
||||
|
||||
TOKENIZER_CHECKPOINTS = ["gpt2"]
|
||||
TINY_MODEL_CHECKPOINT = "gpt2"
|
||||
TOKENIZER_CHECKPOINTS = ["openai-community/gpt2"]
|
||||
TINY_MODEL_CHECKPOINT = "openai-community/gpt2"
|
||||
|
||||
if is_tf_available():
|
||||
|
||||
|
||||
@@ -202,7 +202,9 @@ class FlaxGPTNeoModelTest(FlaxModelTesterMixin, FlaxGenerationTesterMixin, unitt
|
||||
|
||||
@slow
|
||||
def test_batch_generation(self):
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", pad_token="<|endoftext|>", padding_side="left")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(
|
||||
"openai-community/gpt2", pad_token="<|endoftext|>", padding_side="left"
|
||||
)
|
||||
inputs = tokenizer(["Hello this is a long string", "Hey"], return_tensors="np", padding=True, truncation=True)
|
||||
|
||||
model = FlaxGPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-125M")
|
||||
|
||||
@@ -199,7 +199,9 @@ class FlaxGPTJModelTest(FlaxModelTesterMixin, FlaxGenerationTesterMixin, unittes
|
||||
|
||||
@tooslow
|
||||
def test_batch_generation(self):
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", pad_token="<|endoftext|>", padding_side="left")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(
|
||||
"openai-community/gpt2", pad_token="<|endoftext|>", padding_side="left"
|
||||
)
|
||||
inputs = tokenizer(["Hello this is a long string", "Hey"], return_tensors="np", padding=True, truncation=True)
|
||||
|
||||
model = FlaxGPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
|
||||
|
||||
@@ -28,7 +28,7 @@ from ...test_tokenization_common import TokenizerTesterMixin
|
||||
|
||||
|
||||
@require_tokenizers
|
||||
# Copied from tests.models.roberta.test_tokenization_roberta.RobertaTokenizationTest with roberta-base->allenai/longformer-base-4096,Roberta->Longformer,roberta->longformer,
|
||||
# Copied from tests.models.roberta.test_tokenization_roberta.RobertaTokenizationTest with FacebookAI/roberta-base->allenai/longformer-base-4096,Roberta->Longformer,roberta->longformer,
|
||||
class LongformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
# Ignore copy
|
||||
tokenizer_class = LongformerTokenizer
|
||||
|
||||
@@ -1373,7 +1373,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
inputs = new_tokenizer(text, xpaths=xpaths)
|
||||
self.assertEqual(len(inputs["input_ids"]), 2)
|
||||
decoded_input = new_tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
|
||||
expected_result = ( # original expected result "this is the" seems contradicts to roberta-based tokenizer
|
||||
expected_result = ( # original expected result "this is the" seems contradicts to FacebookAI/roberta-based tokenizer
|
||||
"thisisthe"
|
||||
)
|
||||
|
||||
|
||||
@@ -258,7 +258,7 @@ class MobileBERTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
)
|
||||
|
||||
@slow
|
||||
# Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_sequence_builders with bert-base-uncased->google/mobilebert-uncased
|
||||
# Copied from tests.models.bert.test_tokenization_bert.BertTokenizationTest.test_sequence_builders with google-bert/bert-base-uncased->google/mobilebert-uncased
|
||||
def test_sequence_builders(self):
|
||||
tokenizer = self.tokenizer_class.from_pretrained("google/mobilebert-uncased")
|
||||
|
||||
|
||||
@@ -104,7 +104,7 @@ class MT5ModelTester:
|
||||
self.decoder_layers = decoder_layers
|
||||
|
||||
def get_large_model_config(self):
|
||||
return MT5Config.from_pretrained("t5-base")
|
||||
return MT5Config.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2)
|
||||
@@ -940,7 +940,7 @@ class MT5EncoderOnlyModelTester:
|
||||
self.is_training = is_training
|
||||
|
||||
def get_large_model_config(self):
|
||||
return MT5Config.from_pretrained("t5-base")
|
||||
return MT5Config.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
|
||||
|
||||
@@ -279,7 +279,7 @@ class OpenAIGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
|
||||
class OPENAIGPTModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_openai_gpt(self):
|
||||
model = OpenAIGPTLMHeadModel.from_pretrained("openai-gpt")
|
||||
model = OpenAIGPTLMHeadModel.from_pretrained("openai-community/openai-gpt")
|
||||
model.to(torch_device)
|
||||
input_ids = torch.tensor([[481, 4735, 544]], dtype=torch.long, device=torch_device) # the president is
|
||||
expected_output_ids = [
|
||||
|
||||
@@ -262,7 +262,7 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Tes
|
||||
class TFOPENAIGPTModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_openai_gpt(self):
|
||||
model = TFOpenAIGPTLMHeadModel.from_pretrained("openai-gpt")
|
||||
model = TFOpenAIGPTLMHeadModel.from_pretrained("openai-community/openai-gpt")
|
||||
input_ids = tf.convert_to_tensor([[481, 4735, 544]], dtype=tf.int32) # the president is
|
||||
expected_output_ids = [
|
||||
481,
|
||||
|
||||
@@ -41,7 +41,7 @@ class Pix2StructProcessorTest(unittest.TestCase):
|
||||
self.tmpdirname = tempfile.mkdtemp()
|
||||
|
||||
image_processor = Pix2StructImageProcessor()
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
processor = Pix2StructProcessor(image_processor, tokenizer)
|
||||
|
||||
|
||||
@@ -563,7 +563,7 @@ class QDQBertModelIntegrationTest(unittest.TestCase):
|
||||
quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
|
||||
quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
|
||||
|
||||
model = QDQBertModel.from_pretrained("bert-base-uncased")
|
||||
model = QDQBertModel.from_pretrained("google-bert/bert-base-uncased")
|
||||
input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
|
||||
attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
|
||||
output = model(input_ids, attention_mask=attention_mask)[0]
|
||||
|
||||
@@ -236,7 +236,7 @@ class RealmTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_sequence_builders(self):
|
||||
tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased")
|
||||
tokenizer = self.tokenizer_class.from_pretrained("google-bert/bert-base-uncased")
|
||||
|
||||
text = tokenizer.encode("sequence builders", add_special_tokens=False)
|
||||
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
|
||||
|
||||
@@ -154,6 +154,6 @@ class FlaxRobertaModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_class_name in self.all_model_classes:
|
||||
model = model_class_name.from_pretrained("roberta-base", from_pt=True)
|
||||
model = model_class_name.from_pretrained("FacebookAI/roberta-base", from_pt=True)
|
||||
outputs = model(np.ones((1, 1)))
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
@@ -527,7 +527,7 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
||||
class RobertaModelIntegrationTest(TestCasePlus):
|
||||
@slow
|
||||
def test_inference_masked_lm(self):
|
||||
model = RobertaForMaskedLM.from_pretrained("roberta-base")
|
||||
model = RobertaForMaskedLM.from_pretrained("FacebookAI/roberta-base")
|
||||
|
||||
input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
with torch.no_grad():
|
||||
@@ -547,7 +547,7 @@ class RobertaModelIntegrationTest(TestCasePlus):
|
||||
|
||||
@slow
|
||||
def test_inference_no_head(self):
|
||||
model = RobertaModel.from_pretrained("roberta-base")
|
||||
model = RobertaModel.from_pretrained("FacebookAI/roberta-base")
|
||||
|
||||
input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
with torch.no_grad():
|
||||
@@ -565,7 +565,7 @@ class RobertaModelIntegrationTest(TestCasePlus):
|
||||
|
||||
@slow
|
||||
def test_inference_classification_head(self):
|
||||
model = RobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
|
||||
model = RobertaForSequenceClassification.from_pretrained("FacebookAI/roberta-large-mnli")
|
||||
|
||||
input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
with torch.no_grad():
|
||||
|
||||
@@ -666,7 +666,7 @@ class TFRobertaModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC
|
||||
class TFRobertaModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_inference_masked_lm(self):
|
||||
model = TFRobertaForMaskedLM.from_pretrained("roberta-base")
|
||||
model = TFRobertaForMaskedLM.from_pretrained("FacebookAI/roberta-base")
|
||||
|
||||
input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
output = model(input_ids)[0]
|
||||
@@ -680,7 +680,7 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_inference_no_head(self):
|
||||
model = TFRobertaModel.from_pretrained("roberta-base")
|
||||
model = TFRobertaModel.from_pretrained("FacebookAI/roberta-base")
|
||||
|
||||
input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
output = model(input_ids)[0]
|
||||
@@ -692,7 +692,7 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_inference_classification_head(self):
|
||||
model = TFRobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
|
||||
model = TFRobertaForSequenceClassification.from_pretrained("FacebookAI/roberta-large-mnli")
|
||||
|
||||
input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
output = model(input_ids)[0]
|
||||
|
||||
@@ -105,7 +105,7 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_sequence_builders(self):
|
||||
tokenizer = self.tokenizer_class.from_pretrained("roberta-base")
|
||||
tokenizer = self.tokenizer_class.from_pretrained("FacebookAI/roberta-base")
|
||||
|
||||
text = tokenizer.encode("sequence builders", add_special_tokens=False)
|
||||
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
|
||||
|
||||
@@ -134,7 +134,7 @@ class FlaxRobertaPreLayerNormModelTester(unittest.TestCase):
|
||||
|
||||
|
||||
@require_flax
|
||||
# Copied from tests.models.roberta.test_modeling_flax_roberta.FlaxRobertaModelTest with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,roberta-base->andreasmadsen/efficient_mlm_m0.40
|
||||
# Copied from tests.models.roberta.test_modeling_flax_roberta.FlaxRobertaModelTest with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,FacebookAI/roberta-base->andreasmadsen/efficient_mlm_m0.40
|
||||
class FlaxRobertaPreLayerNormModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
test_head_masking = True
|
||||
|
||||
|
||||
@@ -578,7 +578,7 @@ class FlaxEncoderDecoderMixin:
|
||||
class FlaxWav2Vec2GPT2ModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
|
||||
def get_pretrained_model_and_inputs(self):
|
||||
model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"facebook/wav2vec2-large-lv60", "gpt2-medium"
|
||||
"facebook/wav2vec2-large-lv60", "openai-community/gpt2-medium"
|
||||
)
|
||||
batch_size = 13
|
||||
input_values = floats_tensor([batch_size, 512], scale=1.0)
|
||||
@@ -812,7 +812,7 @@ class FlaxWav2Vec2BartModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
|
||||
class FlaxWav2Vec2BertModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
|
||||
def get_pretrained_model_and_inputs(self):
|
||||
model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"facebook/wav2vec2-large-lv60", "bert-large-uncased"
|
||||
"facebook/wav2vec2-large-lv60", "google-bert/bert-large-uncased"
|
||||
)
|
||||
batch_size = 13
|
||||
input_values = floats_tensor([batch_size, 512], model.config.encoder.vocab_size)
|
||||
|
||||
@@ -445,7 +445,7 @@ class EncoderDecoderMixin:
|
||||
class Wav2Vec2BertModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
def get_pretrained_model_and_inputs(self):
|
||||
model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"facebook/wav2vec2-base-960h", "bert-base-cased"
|
||||
"facebook/wav2vec2-base-960h", "google-bert/bert-base-cased"
|
||||
)
|
||||
batch_size = 13
|
||||
input_values = floats_tensor([batch_size, 512], scale=1.0)
|
||||
@@ -509,7 +509,7 @@ class Wav2Vec2BertModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
class Speech2TextBertModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
def get_pretrained_model_and_inputs(self):
|
||||
model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"facebook/s2t-small-librispeech-asr", "bert-base-cased"
|
||||
"facebook/s2t-small-librispeech-asr", "google-bert/bert-base-cased"
|
||||
)
|
||||
batch_size = 13
|
||||
input_features = floats_tensor([batch_size, 7, 80], scale=1.0)
|
||||
|
||||
@@ -1065,7 +1065,7 @@ class SwitchTransformerModelIntegrationTests(unittest.TestCase):
|
||||
model = SwitchTransformersForConditionalGeneration.from_pretrained(
|
||||
"google/switch-base-8", torch_dtype=torch.bfloat16
|
||||
).eval()
|
||||
tokenizer = AutoTokenizer.from_pretrained("t5-small", use_fast=False, legacy=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small", use_fast=False, legacy=False)
|
||||
model = model.to(torch_device)
|
||||
|
||||
input_ids = tokenizer(
|
||||
@@ -1093,7 +1093,7 @@ class SwitchTransformerModelIntegrationTests(unittest.TestCase):
|
||||
model = SwitchTransformersForConditionalGeneration.from_pretrained(
|
||||
"google/switch-base-8", torch_dtype=torch.bfloat16
|
||||
).eval()
|
||||
tokenizer = AutoTokenizer.from_pretrained("t5-small", use_fast=False, legacy=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small", use_fast=False, legacy=False)
|
||||
|
||||
inputs = [
|
||||
"A <extra_id_0> walks into a bar and orders a <extra_id_1> with <extra_id_2> pinch of <extra_id_3>."
|
||||
|
||||
@@ -773,8 +773,8 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
|
||||
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
|
||||
"""
|
||||
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("Hello there", return_tensors="np").input_ids
|
||||
labels = tokenizer("Hi I am", return_tensors="np").input_ids
|
||||
@@ -849,11 +849,11 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_small_generation(self):
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
model.config.max_length = 8
|
||||
model.config.num_beams = 1
|
||||
model.config.do_sample = False
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("summarize: Hello there", return_tensors="np").input_ids
|
||||
|
||||
@@ -864,11 +864,11 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_small_generation_bfloat16(self):
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small", dtype=jnp.bfloat16)
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-small", dtype=jnp.bfloat16)
|
||||
model.config.max_length = 8
|
||||
model.config.num_beams = 1
|
||||
model.config.do_sample = False
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("summarize: Hello there", return_tensors="np").input_ids
|
||||
|
||||
@@ -879,8 +879,8 @@ class FlaxT5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_summarization(self):
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
model = FlaxT5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
FRANCE_ARTICLE = ( # @noqa
|
||||
"Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings"
|
||||
|
||||
@@ -108,7 +108,7 @@ class T5ModelTester:
|
||||
self.decoder_layers = decoder_layers
|
||||
|
||||
def get_large_model_config(self):
|
||||
return T5Config.from_pretrained("t5-base")
|
||||
return T5Config.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2)
|
||||
@@ -942,7 +942,7 @@ class T5EncoderOnlyModelTester:
|
||||
self.is_training = is_training
|
||||
|
||||
def get_large_model_config(self):
|
||||
return T5Config.from_pretrained("t5-base")
|
||||
return T5Config.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
|
||||
@@ -1096,36 +1096,40 @@ class T5ModelFp16Tests(unittest.TestCase):
|
||||
with unittest.mock.patch("builtins.__import__", side_effect=import_accelerate_mock):
|
||||
accelerate_available = False
|
||||
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16)
|
||||
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.float16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
|
||||
|
||||
# Load without in bf16
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16)
|
||||
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small", torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
|
||||
|
||||
# Load using `accelerate` in bf16
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
"google-t5/t5-small", torch_dtype=torch.bfloat16, device_map="auto"
|
||||
)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
|
||||
|
||||
# Load using `accelerate` in bf16
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
"t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
|
||||
"google-t5/t5-small", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
|
||||
)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.bfloat16)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.bfloat16)
|
||||
|
||||
# Load without using `accelerate`
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
"t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
|
||||
"google-t5/t5-small", torch_dtype=torch.float16, low_cpu_mem_usage=True
|
||||
)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
|
||||
|
||||
# Load using `accelerate`
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small", torch_dtype=torch.float16, device_map="auto")
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
"google-t5/t5-small", torch_dtype=torch.float16, device_map="auto"
|
||||
)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wo.weight.dtype == torch.float32)
|
||||
self.assertTrue(model.decoder.block[0].layer[2].DenseReluDense.wi.weight.dtype == torch.float16)
|
||||
|
||||
@@ -1136,11 +1140,11 @@ class T5ModelFp16Tests(unittest.TestCase):
|
||||
class T5ModelIntegrationTests(unittest.TestCase):
|
||||
@cached_property
|
||||
def model(self):
|
||||
return T5ForConditionalGeneration.from_pretrained("t5-base").to(torch_device)
|
||||
return T5ForConditionalGeneration.from_pretrained("google-t5/t5-base").to(torch_device)
|
||||
|
||||
@cached_property
|
||||
def tokenizer(self):
|
||||
return T5Tokenizer.from_pretrained("t5-base")
|
||||
return T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
@slow
|
||||
def test_torch_quant(self):
|
||||
@@ -1157,11 +1161,11 @@ class T5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_small_generation(self):
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
|
||||
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
|
||||
model.config.max_length = 8
|
||||
model.config.num_beams = 1
|
||||
model.config.do_sample = False
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("summarize: Hello there", return_tensors="pt").input_ids.to(torch_device)
|
||||
|
||||
@@ -1184,8 +1188,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
|
||||
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
|
||||
"""
|
||||
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(torch_device)
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small").to(torch_device)
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("Hello there", return_tensors="pt").input_ids
|
||||
labels = tokenizer("Hi I am", return_tensors="pt").input_ids
|
||||
@@ -1501,7 +1505,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_translation_en_to_fr(self):
|
||||
model = self.model # t5-base
|
||||
model = self.model # google-t5/t5-base
|
||||
tok = self.tokenizer
|
||||
use_task_specific_params(model, "translation_en_to_fr")
|
||||
|
||||
|
||||
@@ -302,7 +302,7 @@ class TFT5ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
model = TFT5Model.from_pretrained("t5-small")
|
||||
model = TFT5Model.from_pretrained("google-t5/t5-small")
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
def test_generate_with_headmasking(self):
|
||||
@@ -448,8 +448,8 @@ class TFT5EncoderOnlyModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_greedy_xla_generate_simple(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
# two examples with different lengths to confirm that attention masks are operational in XLA
|
||||
sentences = [
|
||||
@@ -476,8 +476,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_greedy_generate(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
sentences = ["Yesterday, my name was", "Today is a beautiful day and"]
|
||||
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
|
||||
@@ -505,8 +505,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
|
||||
# forces the generation to happen on CPU, to avoid GPU-related quirks
|
||||
with tf.device(":/CPU:0"):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
sentence = "Translate English to German: I have two bananas"
|
||||
input_ids = tokenizer(sentence, return_tensors="tf", padding=True).input_ids
|
||||
@@ -526,8 +526,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_sample_generate(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
sentences = ["I really love my", "Translate English to German: the transformers are truly amazing"]
|
||||
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
|
||||
@@ -557,8 +557,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
@unittest.skip("Skip for now as TF 2.13 breaks it on GPU")
|
||||
@slow
|
||||
def test_beam_search_xla_generate_simple(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
# tests XLA with task specific arguments
|
||||
task_specific_config = getattr(model.config, "task_specific_params", {})
|
||||
@@ -590,8 +590,8 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_beam_search_generate(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
sentences = ["I really love my", "Translate English to German: the transformers are truly amazing"]
|
||||
input_ids = tokenizer(sentences, return_tensors="tf", padding=True).input_ids
|
||||
@@ -622,7 +622,7 @@ class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
@cached_property
|
||||
def model(self):
|
||||
return TFT5ForConditionalGeneration.from_pretrained("t5-base")
|
||||
return TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-base")
|
||||
|
||||
@slow
|
||||
def test_small_integration_test(self):
|
||||
@@ -638,8 +638,8 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
>>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
|
||||
"""
|
||||
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
|
||||
input_ids = tokenizer("Hello there", return_tensors="tf").input_ids
|
||||
labels = tokenizer("Hi I am", return_tensors="tf").input_ids
|
||||
@@ -703,7 +703,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_summarization(self):
|
||||
model = self.model
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
FRANCE_ARTICLE = ( # @noqa
|
||||
"Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings"
|
||||
@@ -948,7 +948,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_translation_en_to_de(self):
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
model = self.model
|
||||
|
||||
task_specific_config = getattr(model.config, "task_specific_params", {})
|
||||
@@ -978,7 +978,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_translation_en_to_fr(self):
|
||||
model = self.model
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
task_specific_config = getattr(model.config, "task_specific_params", {})
|
||||
translation_config = task_specific_config.get("translation_en_to_fr", {})
|
||||
@@ -1015,7 +1015,7 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_translation_en_to_ro(self):
|
||||
model = self.model
|
||||
tok = T5Tokenizer.from_pretrained("t5-base")
|
||||
tok = T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
task_specific_config = getattr(model.config, "task_specific_params", {})
|
||||
translation_config = task_specific_config.get("translation_en_to_ro", {})
|
||||
|
||||
@@ -138,11 +138,11 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
@cached_property
|
||||
def t5_base_tokenizer(self):
|
||||
return T5Tokenizer.from_pretrained("t5-base")
|
||||
return T5Tokenizer.from_pretrained("google-t5/t5-base")
|
||||
|
||||
@cached_property
|
||||
def t5_base_tokenizer_fast(self):
|
||||
return T5TokenizerFast.from_pretrained("t5-base")
|
||||
return T5TokenizerFast.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def get_tokenizer(self, **kwargs) -> T5Tokenizer:
|
||||
return self.tokenizer_class.from_pretrained(self.tmpdirname, **kwargs)
|
||||
@@ -373,7 +373,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
self.tokenizer_integration_test_util(
|
||||
expected_encoding=expected_encoding,
|
||||
model_name="t5-base",
|
||||
model_name="google-t5/t5-base",
|
||||
revision="5a7ff2d8f5117c194c7e32ec1ccbf04642cca99b",
|
||||
)
|
||||
|
||||
@@ -400,7 +400,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010)))
|
||||
|
||||
def test_some_edge_cases(self):
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
|
||||
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base", legacy=False)
|
||||
|
||||
sp_tokens = tokenizer.sp_model.encode("</s>>", out_type=str)
|
||||
self.assertEqual(sp_tokens, ["<", "/", "s", ">", ">"])
|
||||
@@ -426,8 +426,8 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_fast_slow_edge_cases(self):
|
||||
# We are testing spaces before and spaces after special tokens + space transformations
|
||||
slow_tokenizer = T5Tokenizer.from_pretrained("t5-base", legacy=False)
|
||||
fast_tokenizer = T5TokenizerFast.from_pretrained("t5-base", legacy=False, from_slow=True)
|
||||
slow_tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base", legacy=False)
|
||||
fast_tokenizer = T5TokenizerFast.from_pretrained("google-t5/t5-base", legacy=False, from_slow=True)
|
||||
slow_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=False))
|
||||
fast_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=False))
|
||||
|
||||
@@ -445,7 +445,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
with self.subTest(f"fast {edge_case} normalized = False"):
|
||||
self.assertEqual(fast_tokenizer.tokenize(hard_case), EXPECTED_SLOW)
|
||||
|
||||
fast_tokenizer = T5TokenizerFast.from_pretrained("t5-base", legacy=False, from_slow=True)
|
||||
fast_tokenizer = T5TokenizerFast.from_pretrained("google-t5/t5-base", legacy=False, from_slow=True)
|
||||
fast_tokenizer.add_tokens(AddedToken("<new_token_test_>", rstrip=False, lstrip=False, normalized=True))
|
||||
|
||||
# `normalized=True` is the default normalization scheme when adding a token. Normalize -> don't strip the space.
|
||||
@@ -604,7 +604,7 @@ class CommonSpmIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
# Test with T5
|
||||
hf_tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
hf_tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
|
||||
vocab_path = "gs://t5-data/vocabs/cc_all.32000/sentencepiece.model"
|
||||
t5x_tokenizer = SentencePieceVocabulary(vocab_path, extra_ids=300)
|
||||
for text in input_texts:
|
||||
|
||||
@@ -603,7 +603,7 @@ class UMT5EncoderOnlyModelTester:
|
||||
self.is_training = is_training
|
||||
|
||||
def get_large_model_config(self):
|
||||
return UMT5Config.from_pretrained("t5-base")
|
||||
return UMT5Config.from_pretrained("google-t5/t5-base")
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
|
||||
|
||||
@@ -426,7 +426,7 @@ class FlaxViT2GPT2EncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.Test
|
||||
|
||||
def get_pretrained_model(self):
|
||||
return FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google/vit-base-patch16-224-in21k", "gpt2"
|
||||
"google/vit-base-patch16-224-in21k", "openai-community/gpt2"
|
||||
)
|
||||
|
||||
|
||||
@@ -434,7 +434,7 @@ class FlaxViT2GPT2EncoderDecoderModelTest(FlaxEncoderDecoderMixin, unittest.Test
|
||||
class FlaxVisionEncoderDecoderModelTest(unittest.TestCase):
|
||||
def get_from_encoderdecoder_pretrained_model(self):
|
||||
return FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google/vit-base-patch16-224-in21k", "gpt2"
|
||||
"google/vit-base-patch16-224-in21k", "openai-community/gpt2"
|
||||
)
|
||||
|
||||
def _check_configuration_tie(self, model):
|
||||
|
||||
@@ -627,7 +627,9 @@ class TFVisionEncoderDecoderMixin:
|
||||
@require_tf
|
||||
class TFViT2GPT2EncoderDecoderModelTest(TFVisionEncoderDecoderMixin, unittest.TestCase):
|
||||
def get_pretrained_model(self):
|
||||
return TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained("google/vit-base-patch16-224-in21k", "gpt2")
|
||||
return TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google/vit-base-patch16-224-in21k", "openai-community/gpt2"
|
||||
)
|
||||
|
||||
def get_encoder_decoder_model(self, config, decoder_config):
|
||||
encoder_model = TFViTModel(config, name="encoder")
|
||||
@@ -672,10 +674,12 @@ class TFViT2GPT2EncoderDecoderModelTest(TFVisionEncoderDecoderMixin, unittest.Te
|
||||
@require_tf
|
||||
class TFVisionEncoderDecoderModelTest(unittest.TestCase):
|
||||
def get_from_encoderdecoder_pretrained_model(self):
|
||||
return TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained("google/vit-base-patch16-224-in21k", "gpt2")
|
||||
return TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
|
||||
"google/vit-base-patch16-224-in21k", "openai-community/gpt2"
|
||||
)
|
||||
|
||||
def get_decoder_config(self):
|
||||
config = AutoConfig.from_pretrained("gpt2")
|
||||
config = AutoConfig.from_pretrained("openai-community/gpt2")
|
||||
config.is_decoder = True
|
||||
config.add_cross_attention = True
|
||||
return config
|
||||
@@ -685,7 +689,9 @@ class TFVisionEncoderDecoderModelTest(unittest.TestCase):
|
||||
|
||||
def get_encoder_decoder_models(self):
|
||||
encoder_model = TFViTModel.from_pretrained("google/vit-base-patch16-224-in21k", name="encoder")
|
||||
decoder_model = TFGPT2LMHeadModel.from_pretrained("gpt2", config=self.get_decoder_config(), name="decoder")
|
||||
decoder_model = TFGPT2LMHeadModel.from_pretrained(
|
||||
"openai-community/gpt2", config=self.get_decoder_config(), name="decoder"
|
||||
)
|
||||
return {"encoder": encoder_model, "decoder": decoder_model}
|
||||
|
||||
def _check_configuration_tie(self, model):
|
||||
@@ -714,7 +720,7 @@ def prepare_img():
|
||||
class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
||||
def get_encoder_decoder_config(self):
|
||||
encoder_config = AutoConfig.from_pretrained("google/vit-base-patch16-224-in21k")
|
||||
decoder_config = AutoConfig.from_pretrained("gpt2", is_decoder=True, add_cross_attention=True)
|
||||
decoder_config = AutoConfig.from_pretrained("openai-community/gpt2", is_decoder=True, add_cross_attention=True)
|
||||
return VisionEncoderDecoderConfig.from_encoder_decoder_configs(encoder_config, decoder_config)
|
||||
|
||||
def get_encoder_decoder_config_small(self):
|
||||
@@ -829,7 +835,7 @@ class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
||||
|
||||
config = self.get_encoder_decoder_config()
|
||||
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
|
||||
decoder_tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||
decoder_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
img = prepare_img()
|
||||
pixel_values = image_processor(images=img, return_tensors="tf").pixel_values
|
||||
@@ -845,7 +851,7 @@ class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
||||
encoder = TFAutoModel.from_pretrained("google/vit-base-patch16-224-in21k", name="encoder")
|
||||
# It's necessary to specify `add_cross_attention=True` here.
|
||||
decoder = TFAutoModelForCausalLM.from_pretrained(
|
||||
"gpt2", is_decoder=True, add_cross_attention=True, name="decoder"
|
||||
"openai-community/gpt2", is_decoder=True, add_cross_attention=True, name="decoder"
|
||||
)
|
||||
pretrained_encoder_dir = os.path.join(tmp_dirname, "pretrained_encoder")
|
||||
pretrained_decoder_dir = os.path.join(tmp_dirname, "pretrained_decoder")
|
||||
|
||||
@@ -369,7 +369,7 @@ class TFXLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
|
||||
class TFXLMModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_xlm_mlm_en_2048(self):
|
||||
model = TFXLMWithLMHeadModel.from_pretrained("xlm-mlm-en-2048")
|
||||
model = TFXLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-mlm-en-2048")
|
||||
input_ids = tf.convert_to_tensor([[14, 447]], dtype=tf.int32) # the president
|
||||
expected_output_ids = [
|
||||
14,
|
||||
|
||||
@@ -514,7 +514,7 @@ class XLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
||||
class XLMModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_xlm_mlm_en_2048(self):
|
||||
model = XLMWithLMHeadModel.from_pretrained("xlm-mlm-en-2048")
|
||||
model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-mlm-en-2048")
|
||||
model.to(torch_device)
|
||||
input_ids = torch.tensor([[14, 447]], dtype=torch.long, device=torch_device) # the president
|
||||
expected_output_ids = [
|
||||
|
||||
@@ -85,7 +85,7 @@ class XLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_sequence_builders(self):
|
||||
tokenizer = XLMTokenizer.from_pretrained("xlm-mlm-en-2048")
|
||||
tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-mlm-en-2048")
|
||||
|
||||
text = tokenizer.encode("sequence builders", add_special_tokens=False)
|
||||
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
|
||||
|
||||
@@ -32,8 +32,8 @@ if is_flax_available():
|
||||
class FlaxXLMRobertaModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_flax_xlm_roberta_base(self):
|
||||
model = FlaxXLMRobertaModel.from_pretrained("xlm-roberta-base")
|
||||
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
|
||||
model = FlaxXLMRobertaModel.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||
text = "The dog is cute and lives in the garden house"
|
||||
input_ids = jnp.array([tokenizer.encode(text)])
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ if is_torch_available():
|
||||
class XLMRobertaModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_xlm_roberta_base(self):
|
||||
model = XLMRobertaModel.from_pretrained("xlm-roberta-base")
|
||||
model = XLMRobertaModel.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||
input_ids = torch.tensor([[0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]])
|
||||
# The dog is cute and lives in the garden house
|
||||
|
||||
@@ -51,7 +51,7 @@ class XLMRobertaModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_xlm_roberta_large(self):
|
||||
model = XLMRobertaModel.from_pretrained("xlm-roberta-large")
|
||||
model = XLMRobertaModel.from_pretrained("FacebookAI/xlm-roberta-large")
|
||||
input_ids = torch.tensor([[0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]])
|
||||
# The dog is cute and lives in the garden house
|
||||
|
||||
|
||||
@@ -212,7 +212,7 @@ class XLMRobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
@cached_property
|
||||
def big_tokenizer(self):
|
||||
return XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
|
||||
return XLMRobertaTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||
|
||||
def test_picklable_without_disk(self):
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
@@ -338,6 +338,6 @@ class XLMRobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
self.tokenizer_integration_test_util(
|
||||
expected_encoding=expected_encoding,
|
||||
model_name="xlm-roberta-base",
|
||||
model_name="FacebookAI/xlm-roberta-base",
|
||||
revision="d9d8a8ea5eb94b1c6654ae9249df7793cd2933d3",
|
||||
)
|
||||
|
||||
@@ -491,7 +491,7 @@ class TFXLNetModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCas
|
||||
class TFXLNetModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_xlnet_base_cased(self):
|
||||
model = TFXLNetLMHeadModel.from_pretrained("xlnet-base-cased")
|
||||
model = TFXLNetLMHeadModel.from_pretrained("xlnet/xlnet-base-cased")
|
||||
# fmt: off
|
||||
input_ids = tf.convert_to_tensor(
|
||||
[
|
||||
|
||||
@@ -694,7 +694,7 @@ class XLNetModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
||||
class XLNetModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_xlnet_base_cased(self):
|
||||
model = XLNetLMHeadModel.from_pretrained("xlnet-base-cased")
|
||||
model = XLNetLMHeadModel.from_pretrained("xlnet/xlnet-base-cased")
|
||||
model.to(torch_device)
|
||||
# fmt: off
|
||||
input_ids = torch.tensor(
|
||||
|
||||
@@ -186,7 +186,7 @@ class XLNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_sequence_builders(self):
|
||||
tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
|
||||
tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
|
||||
|
||||
text = tokenizer.encode("sequence builders", add_special_tokens=False)
|
||||
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
|
||||
@@ -203,6 +203,6 @@ class XLNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
|
||||
self.tokenizer_integration_test_util(
|
||||
expected_encoding=expected_encoding,
|
||||
model_name="xlnet-base-cased",
|
||||
model_name="xlnet/xlnet-base-cased",
|
||||
revision="c841166438c31ec7ca9a106dee7bb312b73ae511",
|
||||
)
|
||||
|
||||
@@ -630,7 +630,7 @@ class XmodModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_end_to_end_mask_fill(self):
|
||||
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
|
||||
tokenizer = XLMRobertaTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||
model = XmodForMaskedLM.from_pretrained("facebook/xmod-base", default_language="en_XX")
|
||||
model.to(torch_device)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user