Tf model outputs (#6247)
* TF outputs and test on BERT * Albert to DistilBert * All remaining TF models except T5 * Documentation * One file forgotten * TF outputs and test on BERT * Albert to DistilBert * All remaining TF models except T5 * Documentation * One file forgotten * Add new models and fix issues * Quality improvements * Add T5 * A bit of cleanup * Fix for slow tests * Style
This commit is contained in:
@@ -116,6 +116,7 @@ class TFAlbertModelTester:
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -129,21 +130,17 @@ class TFAlbertModelTester:
|
||||
# 'token_type_ids': token_type_ids}
|
||||
# sequence_output, pooled_output = model(**inputs)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
sequence_output, pooled_output = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
sequence_output, pooled_output = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
sequence_output, pooled_output = model(input_ids)
|
||||
result = model(input_ids)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
"pooled_output": pooled_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
|
||||
self.parent.assertListEqual(list(result["pooler_output"].shape), [self.batch_size, self.hidden_size])
|
||||
|
||||
def create_and_check_albert_for_pretraining(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -151,28 +148,19 @@ class TFAlbertModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFAlbertForPreTraining(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
prediction_scores, sop_scores = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
"sop_scores": sop_scores.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
list(result["prediction_logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
self.parent.assertListEqual(list(result["sop_scores"].shape), [self.batch_size, self.num_labels])
|
||||
self.parent.assertListEqual(list(result["sop_logits"].shape), [self.batch_size, self.num_labels])
|
||||
|
||||
def create_and_check_albert_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFAlbertForMaskedLM(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(prediction_scores,) = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
def create_and_check_albert_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -180,10 +168,7 @@ class TFAlbertModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFAlbertForSequenceClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
|
||||
|
||||
def create_and_check_albert_for_question_answering(
|
||||
@@ -191,11 +176,7 @@ class TFAlbertModelTester:
|
||||
):
|
||||
model = TFAlbertForQuestionAnswering(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
|
||||
@@ -118,6 +118,7 @@ class TFBertModelTester:
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -130,18 +131,14 @@ class TFBertModelTester:
|
||||
sequence_output, pooled_output = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
sequence_output, pooled_output = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
sequence_output, pooled_output = model(input_ids)
|
||||
result = model(input_ids)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
"pooled_output": pooled_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
|
||||
self.parent.assertListEqual(list(result["pooler_output"].shape), [self.batch_size, self.hidden_size])
|
||||
|
||||
def create_and_check_bert_lm_head(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -153,7 +150,7 @@ class TFBertModelTester:
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
(prediction_scores,) = model(inputs)
|
||||
prediction_scores = model(inputs)["logits"]
|
||||
self.parent.assertListEqual(
|
||||
list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
@@ -167,39 +164,27 @@ class TFBertModelTester:
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
(prediction_scores,) = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
def create_and_check_bert_for_next_sequence_prediction(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFBertForNextSentencePrediction(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(seq_relationship_score,) = model(inputs)
|
||||
result = {
|
||||
"seq_relationship_score": seq_relationship_score.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2])
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, 2])
|
||||
|
||||
def create_and_check_bert_for_pretraining(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFBertForPreTraining(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
prediction_scores, seq_relationship_score = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
"seq_relationship_score": seq_relationship_score.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
list(result["prediction_logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2])
|
||||
self.parent.assertListEqual(list(result["seq_relationship_logits"].shape), [self.batch_size, 2])
|
||||
|
||||
def create_and_check_bert_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -212,8 +197,7 @@ class TFBertModelTester:
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
(logits,) = model(inputs)
|
||||
result = {"logits": logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
|
||||
|
||||
def create_and_check_bert_for_multiple_choice(
|
||||
@@ -229,8 +213,7 @@ class TFBertModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {"logits": logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def create_and_check_bert_for_token_classification(
|
||||
@@ -243,10 +226,7 @@ class TFBertModelTester:
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def create_and_check_bert_for_question_answering(
|
||||
@@ -259,8 +239,7 @@ class TFBertModelTester:
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {"start_logits": start_logits.numpy(), "end_logits": end_logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
|
||||
[[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]], dtype=tf.int32,
|
||||
) # J'aime le camembert !"
|
||||
|
||||
output = model(input_ids)[0]
|
||||
output = model(input_ids)["last_hidden_state"]
|
||||
expected_shape = tf.TensorShape((1, 10, 768))
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# compare the actual values for a slice.
|
||||
|
||||
@@ -146,7 +146,8 @@ class TFModelTesterMixin:
|
||||
tf.saved_model.save(model, tmpdirname)
|
||||
model = tf.keras.models.load_model(tmpdirname)
|
||||
outputs = model(inputs_dict)
|
||||
hidden_states = [t.numpy() for t in outputs[-1]]
|
||||
output = outputs[list(outputs.keys())[-1]] if isinstance(outputs, dict) else outputs[-1]
|
||||
hidden_states = [t.numpy() for t in output]
|
||||
self.assertEqual(len(outputs), num_out)
|
||||
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
|
||||
self.assertListEqual(
|
||||
@@ -177,7 +178,8 @@ class TFModelTesterMixin:
|
||||
tf.saved_model.save(model, tmpdirname)
|
||||
model = tf.keras.models.load_model(tmpdirname)
|
||||
outputs = model(inputs_dict)
|
||||
attentions = [t.numpy() for t in outputs[-1]]
|
||||
output = outputs[list(outputs.keys())[-1]] if isinstance(outputs, dict) else outputs[-1]
|
||||
attentions = [t.numpy() for t in output]
|
||||
self.assertEqual(len(outputs), num_out)
|
||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
||||
self.assertListEqual(
|
||||
@@ -238,6 +240,8 @@ class TFModelTesterMixin:
|
||||
# Make sure we don't have nans
|
||||
if isinstance(after_outputs, tf.Tensor):
|
||||
out_1 = after_outputs.numpy()
|
||||
elif isinstance(after_outputs, dict):
|
||||
out_1 = after_outputs[list(after_outputs.keys())[0]]
|
||||
else:
|
||||
out_1 = after_outputs[0].numpy()
|
||||
out_2 = outputs[0].numpy()
|
||||
|
||||
@@ -89,9 +89,10 @@ class TFCTRLModelTester(object):
|
||||
# hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
n_positions=self.max_position_embeddings,
|
||||
n_ctx=self.max_position_embeddings
|
||||
n_ctx=self.max_position_embeddings,
|
||||
# type_vocab_size=self.type_vocab_size,
|
||||
# initializer_range=self.initializer_range
|
||||
# initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||
@@ -111,30 +112,22 @@ class TFCTRLModelTester(object):
|
||||
def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
|
||||
model = TFCTRLModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
sequence_output = model(inputs)[0]
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, None, input_mask] # None is the input for 'past'
|
||||
sequence_output = model(inputs)[0]
|
||||
result = model(inputs)
|
||||
|
||||
sequence_output = model(input_ids)[0]
|
||||
result = model(input_ids)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
|
||||
model = TFCTRLLMHeadModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
prediction_scores = model(inputs)[0]
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
|
||||
@@ -89,6 +89,7 @@ class TFDistilBertModelTester:
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -99,18 +100,14 @@ class TFDistilBertModelTester:
|
||||
model = TFDistilBertModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
|
||||
outputs = model(inputs)
|
||||
sequence_output = outputs[0]
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
|
||||
(sequence_output,) = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_distilbert_for_masked_lm(
|
||||
@@ -118,11 +115,8 @@ class TFDistilBertModelTester:
|
||||
):
|
||||
model = TFDistilBertForMaskedLM(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
(prediction_scores,) = model(inputs)
|
||||
result = {"prediction_scores": prediction_scores.numpy()}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
def create_and_check_distilbert_for_question_answering(
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -132,8 +126,7 @@ class TFDistilBertModelTester:
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
}
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {"start_logits": start_logits.numpy(), "end_logits": end_logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
@@ -143,8 +136,7 @@ class TFDistilBertModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFDistilBertForSequenceClassification(config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
(logits,) = model(inputs)
|
||||
result = {"logits": logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
|
||||
|
||||
def create_and_check_distilbert_for_multiple_choice(
|
||||
@@ -158,8 +150,7 @@ class TFDistilBertModelTester:
|
||||
"input_ids": multiple_choice_inputs_ids,
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {"logits": logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def create_and_check_distilbert_for_token_classification(
|
||||
@@ -168,10 +159,7 @@ class TFDistilBertModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFDistilBertForTokenClassification(config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
|
||||
@@ -95,6 +95,7 @@ class TFElectraModelTester:
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -104,18 +105,15 @@ class TFElectraModelTester:
|
||||
):
|
||||
model = TFElectraModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(sequence_output,) = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
(sequence_output,) = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
(sequence_output,) = model(input_ids)
|
||||
result = model(input_ids)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_electra_for_masked_lm(
|
||||
@@ -123,24 +121,16 @@ class TFElectraModelTester:
|
||||
):
|
||||
model = TFElectraForMaskedLM(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(prediction_scores,) = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
def create_and_check_electra_for_pretraining(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFElectraForPreTraining(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(prediction_scores,) = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(list(result["prediction_scores"].shape), [self.batch_size, self.seq_length])
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
def create_and_check_electra_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -148,10 +138,7 @@ class TFElectraModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFElectraForSequenceClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
|
||||
|
||||
def create_and_check_electra_for_multiple_choice(
|
||||
@@ -167,8 +154,7 @@ class TFElectraModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {"logits": logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def create_and_check_electra_for_question_answering(
|
||||
@@ -176,11 +162,7 @@ class TFElectraModelTester:
|
||||
):
|
||||
model = TFElectraForQuestionAnswering(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
@@ -190,10 +172,7 @@ class TFElectraModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFElectraForTokenClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
|
||||
@@ -113,6 +113,7 @@ class TFFlaubertModelTester:
|
||||
summary_type=self.summary_type,
|
||||
use_proj=self.use_proj,
|
||||
bos_token_id=self.bos_token_id,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return (
|
||||
@@ -141,16 +142,12 @@ class TFFlaubertModelTester:
|
||||
):
|
||||
model = TFFlaubertModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
|
||||
outputs = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
outputs = model(inputs)
|
||||
sequence_output = outputs[0]
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_flaubert_lm_head(
|
||||
@@ -168,13 +165,7 @@ class TFFlaubertModelTester:
|
||||
model = TFFlaubertWithLMHeadModel(config)
|
||||
|
||||
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
|
||||
outputs = model(inputs)
|
||||
|
||||
logits = outputs[0]
|
||||
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
@@ -194,12 +185,7 @@ class TFFlaubertModelTester:
|
||||
|
||||
inputs = {"input_ids": input_ids, "lengths": input_lengths}
|
||||
|
||||
start_logits, end_logits = model(inputs)
|
||||
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
@@ -220,11 +206,7 @@ class TFFlaubertModelTester:
|
||||
|
||||
inputs = {"input_ids": input_ids, "lengths": input_lengths}
|
||||
|
||||
(logits,) = model(inputs)
|
||||
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
|
||||
|
||||
@@ -243,10 +225,7 @@ class TFFlaubertModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFFlaubertForTokenClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def create_and_check_flaubert_for_multiple_choice(
|
||||
@@ -271,8 +250,7 @@ class TFFlaubertModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {"logits": logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
|
||||
@@ -102,6 +102,7 @@ class TFGPT2ModelTester:
|
||||
# initializer_range=self.initializer_range
|
||||
bos_token_id=self.bos_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||
@@ -125,18 +126,15 @@ class TFGPT2ModelTester:
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
sequence_output = model(inputs)[0]
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, None, input_mask] # None is the input for 'past'
|
||||
sequence_output = model(inputs)[0]
|
||||
result = model(inputs)
|
||||
|
||||
sequence_output = model(input_ids)[0]
|
||||
result = model(input_ids)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size],
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size],
|
||||
)
|
||||
|
||||
def create_and_check_gpt2_model_past(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
|
||||
@@ -150,7 +148,7 @@ class TFGPT2ModelTester:
|
||||
self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf))
|
||||
self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1)
|
||||
|
||||
output, past = outputs
|
||||
output, past = outputs.to_tuple()
|
||||
|
||||
# create hypothetical next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
|
||||
@@ -160,8 +158,8 @@ class TFGPT2ModelTester:
|
||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
||||
next_token_type_ids = tf.concat([token_type_ids, next_token_types], axis=-1)
|
||||
|
||||
output_from_no_past, _ = model(next_input_ids, token_type_ids=next_token_type_ids)
|
||||
output_from_past, _ = model(next_tokens, token_type_ids=next_token_types, past=past)
|
||||
output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"]
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = int(ids_tensor((1,), shape_list(output_from_past)[-1]))
|
||||
@@ -183,7 +181,7 @@ class TFGPT2ModelTester:
|
||||
attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1)
|
||||
|
||||
# first forward pass
|
||||
output, past = model(input_ids, attention_mask=attn_mask)
|
||||
output, past = model(input_ids, attention_mask=attn_mask).to_tuple()
|
||||
|
||||
# create hypothetical next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
|
||||
@@ -202,8 +200,8 @@ class TFGPT2ModelTester:
|
||||
attn_mask = tf.concat([attn_mask, tf.ones((shape_list(attn_mask)[0], 1), dtype=tf.int32)], axis=1)
|
||||
|
||||
# get two different outputs
|
||||
output_from_no_past, _ = model(next_input_ids, attention_mask=attn_mask)
|
||||
output_from_past, _ = model(next_tokens, past=past, attention_mask=attn_mask)
|
||||
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, past=past, attention_mask=attn_mask)["last_hidden_state"]
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = int(ids_tensor((1,), shape_list(output_from_past)[-1]))
|
||||
@@ -220,12 +218,9 @@ class TFGPT2ModelTester:
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
prediction_scores = model(inputs)[0]
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size],
|
||||
list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size],
|
||||
)
|
||||
|
||||
def create_and_check_gpt2_double_head(
|
||||
@@ -243,8 +238,7 @@ class TFGPT2ModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
lm_logits, mc_logits = model(inputs)[:2]
|
||||
result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size],
|
||||
)
|
||||
|
||||
@@ -138,6 +138,7 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
embedding_size=self.embedding_size,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -147,33 +148,26 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
):
|
||||
model = TFMobileBertModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
sequence_output, pooled_output = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
sequence_output, pooled_output = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
sequence_output, pooled_output = model(input_ids)
|
||||
result = model(input_ids)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
"pooled_output": pooled_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
|
||||
self.parent.assertListEqual(list(result["pooler_output"].shape), [self.batch_size, self.hidden_size])
|
||||
|
||||
def create_and_check_mobilebert_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFMobileBertForMaskedLM(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(prediction_scores,) = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
|
||||
def create_and_check_mobilebert_for_next_sequence_prediction(
|
||||
@@ -181,26 +175,19 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
):
|
||||
model = TFMobileBertForNextSentencePrediction(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(seq_relationship_score,) = model(inputs)
|
||||
result = {
|
||||
"seq_relationship_score": seq_relationship_score.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2])
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, 2])
|
||||
|
||||
def create_and_check_mobilebert_for_pretraining(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFMobileBertForPreTraining(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
prediction_scores, seq_relationship_score = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
"seq_relationship_score": seq_relationship_score.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
list(result["prediction_logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2])
|
||||
self.parent.assertListEqual(list(result["seq_relationship_logits"].shape), [self.batch_size, 2])
|
||||
|
||||
def create_and_check_mobilebert_for_sequence_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -208,10 +195,7 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config.num_labels = self.num_labels
|
||||
model = TFMobileBertForSequenceClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
|
||||
|
||||
def create_and_check_mobilebert_for_multiple_choice(
|
||||
@@ -227,10 +211,7 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def create_and_check_mobilebert_for_token_classification(
|
||||
@@ -239,10 +220,7 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config.num_labels = self.num_labels
|
||||
model = TFMobileBertForTokenClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
|
||||
)
|
||||
@@ -252,11 +230,7 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
):
|
||||
model = TFMobileBertForQuestionAnswering(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
|
||||
@@ -94,9 +94,10 @@ class TFOpenAIGPTModelTester:
|
||||
# hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
n_positions=self.max_position_embeddings,
|
||||
n_ctx=self.max_position_embeddings
|
||||
n_ctx=self.max_position_embeddings,
|
||||
# type_vocab_size=self.type_vocab_size,
|
||||
# initializer_range=self.initializer_range
|
||||
# initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||
@@ -116,30 +117,22 @@ class TFOpenAIGPTModelTester:
|
||||
def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
|
||||
model = TFOpenAIGPTModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
sequence_output = model(inputs)[0]
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
sequence_output = model(inputs)[0]
|
||||
result = model(inputs)
|
||||
|
||||
sequence_output = model(input_ids)[0]
|
||||
result = model(input_ids)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
|
||||
model = TFOpenAIGPTLMHeadModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
prediction_scores = model(inputs)[0]
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
def create_and_check_openai_gpt_double_head(
|
||||
self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
|
||||
@@ -156,8 +149,7 @@ class TFOpenAIGPTModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
lm_logits, mc_logits = model(inputs)[:2]
|
||||
result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size]
|
||||
)
|
||||
|
||||
@@ -95,6 +95,7 @@ class TFRobertaModelTester:
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
type_vocab_size=self.type_vocab_size,
|
||||
initializer_range=self.initializer_range,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -104,31 +105,23 @@ class TFRobertaModelTester:
|
||||
):
|
||||
model = TFRobertaModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
sequence_output = model(inputs)[0]
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
sequence_output = model(inputs)[0]
|
||||
result = model(inputs)
|
||||
|
||||
sequence_output = model(input_ids)[0]
|
||||
result = model(input_ids)
|
||||
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_roberta_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFRobertaForMaskedLM(config=config)
|
||||
prediction_scores = model([input_ids, input_mask, token_type_ids])[0]
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
result = model([input_ids, input_mask, token_type_ids])
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
def create_and_check_roberta_for_token_classification(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
@@ -136,10 +129,7 @@ class TFRobertaModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFRobertaForTokenClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def create_and_check_roberta_for_question_answering(
|
||||
@@ -147,11 +137,7 @@ class TFRobertaModelTester:
|
||||
):
|
||||
model = TFRobertaForQuestionAnswering(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
@@ -168,10 +154,7 @@ class TFRobertaModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
|
||||
@@ -78,6 +78,7 @@ class TFT5ModelTester:
|
||||
bos_token_id=self.pad_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
decoder_start_token_id=self.pad_token_id,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return (config, input_ids, input_mask, token_labels)
|
||||
@@ -89,22 +90,14 @@ class TFT5ModelTester:
|
||||
"decoder_input_ids": input_ids,
|
||||
"decoder_attention_mask": input_mask,
|
||||
}
|
||||
decoder_output, decoder_past, encoder_output = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
decoder_output, decoder_past, encoder_output = model(
|
||||
input_ids, decoder_attention_mask=input_mask, decoder_input_ids=input_ids
|
||||
)
|
||||
result = {
|
||||
"encoder_output": encoder_output.numpy(),
|
||||
"decoder_past": decoder_past,
|
||||
"decoder_output": decoder_output.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["encoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
self.parent.assertListEqual(
|
||||
list(result["decoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
result = model(input_ids, decoder_attention_mask=input_mask, decoder_input_ids=input_ids)
|
||||
decoder_output = result["last_hidden_state"]
|
||||
decoder_past = result["decoder_past_key_values"]
|
||||
encoder_output = result["encoder_last_hidden_state"]
|
||||
self.parent.assertListEqual(list(encoder_output.shape), [self.batch_size, self.seq_length, self.hidden_size])
|
||||
self.parent.assertListEqual(list(decoder_output.shape), [self.batch_size, self.seq_length, self.hidden_size])
|
||||
self.parent.assertEqual(len(decoder_past), 2)
|
||||
# decoder_past[0] should correspond to encoder output
|
||||
self.parent.assertTrue(tf.reduce_all(tf.math.equal(decoder_past[0][0], encoder_output)))
|
||||
@@ -121,14 +114,9 @@ class TFT5ModelTester:
|
||||
"decoder_attention_mask": input_mask,
|
||||
}
|
||||
|
||||
prediction_scores, _, _ = model(inputs_dict)
|
||||
result = model(inputs_dict)
|
||||
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
def create_and_check_t5_decoder_model_past(self, config, input_ids, decoder_input_ids, attention_mask):
|
||||
model = TFT5Model(config=config).get_decoder()
|
||||
|
||||
@@ -79,6 +79,7 @@ class TFTransfoXLModelTester:
|
||||
div_val=self.div_val,
|
||||
n_layer=self.num_hidden_layers,
|
||||
eos_token_id=self.eos_token_id,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return (config, input_ids_1, input_ids_2, lm_labels)
|
||||
@@ -90,11 +91,11 @@ class TFTransfoXLModelTester:
|
||||
def create_and_check_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels):
|
||||
model = TFTransfoXLModel(config)
|
||||
|
||||
hidden_states_1, mems_1 = model(input_ids_1)
|
||||
hidden_states_1, mems_1 = model(input_ids_1).to_tuple()
|
||||
|
||||
inputs = {"input_ids": input_ids_2, "mems": mems_1}
|
||||
|
||||
hidden_states_2, mems_2 = model(inputs)
|
||||
hidden_states_2, mems_2 = model(inputs).to_tuple()
|
||||
|
||||
result = {
|
||||
"hidden_states_1": hidden_states_1.numpy(),
|
||||
@@ -121,16 +122,16 @@ class TFTransfoXLModelTester:
|
||||
def create_and_check_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels):
|
||||
model = TFTransfoXLLMHeadModel(config)
|
||||
|
||||
lm_logits_1, mems_1 = model(input_ids_1)
|
||||
lm_logits_1, mems_1 = model(input_ids_1).to_tuple()
|
||||
|
||||
inputs = {"input_ids": input_ids_1, "labels": lm_labels}
|
||||
_, mems_1 = model(inputs)
|
||||
_, mems_1 = model(inputs).to_tuple()
|
||||
|
||||
lm_logits_2, mems_2 = model([input_ids_2, mems_1])
|
||||
lm_logits_2, mems_2 = model([input_ids_2, mems_1]).to_tuple()
|
||||
|
||||
inputs = {"input_ids": input_ids_1, "mems": mems_1, "labels": lm_labels}
|
||||
|
||||
_, mems_2 = model(inputs)
|
||||
_, mems_2 = model(inputs).to_tuple()
|
||||
|
||||
result = {
|
||||
"mems_1": [mem.numpy() for mem in mems_1],
|
||||
|
||||
@@ -112,6 +112,7 @@ class TFXLMModelTester:
|
||||
summary_type=self.summary_type,
|
||||
use_proj=self.use_proj,
|
||||
bos_token_id=self.bos_token_id,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return (
|
||||
@@ -140,16 +141,12 @@ class TFXLMModelTester:
|
||||
):
|
||||
model = TFXLMModel(config=config)
|
||||
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
|
||||
outputs = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids, input_mask]
|
||||
outputs = model(inputs)
|
||||
sequence_output = outputs[0]
|
||||
result = {
|
||||
"sequence_output": sequence_output.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
|
||||
def create_and_check_xlm_lm_head(
|
||||
@@ -169,11 +166,7 @@ class TFXLMModelTester:
|
||||
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
|
||||
outputs = model(inputs)
|
||||
|
||||
logits = outputs[0]
|
||||
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = outputs
|
||||
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
|
||||
|
||||
@@ -193,12 +186,7 @@ class TFXLMModelTester:
|
||||
|
||||
inputs = {"input_ids": input_ids, "lengths": input_lengths}
|
||||
|
||||
start_logits, end_logits = model(inputs)
|
||||
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
@@ -219,11 +207,7 @@ class TFXLMModelTester:
|
||||
|
||||
inputs = {"input_ids": input_ids, "lengths": input_lengths}
|
||||
|
||||
(logits,) = model(inputs)
|
||||
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
|
||||
|
||||
@@ -242,10 +226,7 @@ class TFXLMModelTester:
|
||||
config.num_labels = self.num_labels
|
||||
model = TFXLMForTokenClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||
|
||||
def create_and_check_xlm_for_multiple_choice(
|
||||
@@ -270,8 +251,7 @@ class TFXLMModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {"logits": logits.numpy()}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
|
||||
@@ -36,7 +36,7 @@ class TFFlaubertModelIntegrationTest(unittest.TestCase):
|
||||
"attention_mask": tf.convert_to_tensor([[1, 1, 1, 1, 1, 1]], dtype=tf.int32),
|
||||
}
|
||||
|
||||
output = model(features)[0]
|
||||
output = model(features)["last_hidden_state"]
|
||||
expected_shape = tf.TensorShape((1, 6, 768))
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# compare the actual values for a slice.
|
||||
|
||||
@@ -110,6 +110,7 @@ class TFXLNetModelTester:
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
return_dict=True,
|
||||
)
|
||||
|
||||
return (
|
||||
@@ -147,17 +148,10 @@ class TFXLNetModelTester:
|
||||
model = TFXLNetModel(config)
|
||||
|
||||
inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids}
|
||||
|
||||
_, _ = model(inputs)
|
||||
result = model(inputs)
|
||||
|
||||
inputs = [input_ids_1, input_mask]
|
||||
|
||||
outputs, mems_1 = model(inputs)
|
||||
|
||||
result = {
|
||||
"mems_1": [mem.numpy() for mem in mems_1],
|
||||
"outputs": outputs.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
config.mem_len = 0
|
||||
model = TFXLNetModel(config)
|
||||
@@ -165,10 +159,10 @@ class TFXLNetModelTester:
|
||||
self.parent.assertEqual(len(no_mems_outputs), 1)
|
||||
|
||||
self.parent.assertListEqual(
|
||||
list(result["outputs"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
list(result["last_hidden_state"].shape), [self.batch_size, self.seq_length, self.hidden_size]
|
||||
)
|
||||
self.parent.assertListEqual(
|
||||
list(list(mem.shape) for mem in result["mems_1"]),
|
||||
list(list(mem.shape) for mem in result["mems"]),
|
||||
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
|
||||
)
|
||||
|
||||
@@ -189,16 +183,13 @@ class TFXLNetModelTester:
|
||||
model = TFXLNetLMHeadModel(config)
|
||||
|
||||
inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids}
|
||||
|
||||
all_logits_1, mems_1 = model(inputs_1)
|
||||
all_logits_1, mems_1 = model(inputs_1).to_tuple()
|
||||
|
||||
inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids}
|
||||
|
||||
all_logits_2, mems_2 = model(inputs_2)
|
||||
all_logits_2, mems_2 = model(inputs_2).to_tuple()
|
||||
|
||||
inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping}
|
||||
|
||||
logits, _ = model(inputs_3)
|
||||
logits, _ = model(inputs_3).to_tuple()
|
||||
|
||||
result = {
|
||||
"mems_1": [mem.numpy() for mem in mems_1],
|
||||
@@ -240,13 +231,7 @@ class TFXLNetModelTester:
|
||||
model = TFXLNetForQuestionAnsweringSimple(config)
|
||||
|
||||
inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids}
|
||||
start_logits, end_logits, mems = model(inputs)
|
||||
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
"mems": [m.numpy() for m in mems],
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
@@ -271,16 +256,11 @@ class TFXLNetModelTester:
|
||||
):
|
||||
model = TFXLNetForSequenceClassification(config)
|
||||
|
||||
logits, mems_1 = model(input_ids_1)
|
||||
|
||||
result = {
|
||||
"mems_1": [mem.numpy() for mem in mems_1],
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(input_ids_1)
|
||||
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
|
||||
self.parent.assertListEqual(
|
||||
list(list(mem.shape) for mem in result["mems_1"]),
|
||||
list(list(mem.shape) for mem in result["mems"]),
|
||||
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
|
||||
)
|
||||
|
||||
@@ -305,16 +285,12 @@ class TFXLNetModelTester:
|
||||
"attention_mask": input_mask,
|
||||
# 'token_type_ids': token_type_ids
|
||||
}
|
||||
logits, mems_1 = model(inputs)
|
||||
result = {
|
||||
"mems_1": [mem.numpy() for mem in mems_1],
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(result["logits"].shape), [self.batch_size, self.seq_length, config.num_labels]
|
||||
)
|
||||
self.parent.assertListEqual(
|
||||
list(list(mem.shape) for mem in result["mems_1"]),
|
||||
list(list(mem.shape) for mem in result["mems"]),
|
||||
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
|
||||
)
|
||||
|
||||
@@ -342,15 +318,11 @@ class TFXLNetModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
(logits, mems_1) = model(inputs)
|
||||
result = {
|
||||
"mems_1": [mem.numpy() for mem in mems_1],
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = model(inputs)
|
||||
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
self.parent.assertListEqual(
|
||||
list(list(mem.shape) for mem in result["mems_1"]),
|
||||
list(list(mem.shape) for mem in result["mems"]),
|
||||
[[self.seq_length, self.batch_size * self.num_choices, self.hidden_size]] * self.num_hidden_layers,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user