[No merge] TF integration testing (#7621)
* stash * TF Integration testing for ELECTRA, BERT, Longformer * Trigger slow tests * Apply suggestions from code review
This commit is contained in:
@@ -514,7 +514,7 @@ class ElectraDiscriminatorPredictions(nn.Module):
|
|||||||
def forward(self, discriminator_hidden_states):
|
def forward(self, discriminator_hidden_states):
|
||||||
hidden_states = self.dense(discriminator_hidden_states)
|
hidden_states = self.dense(discriminator_hidden_states)
|
||||||
hidden_states = get_activation(self.config.hidden_act)(hidden_states)
|
hidden_states = get_activation(self.config.hidden_act)(hidden_states)
|
||||||
logits = self.dense_prediction(hidden_states).squeeze()
|
logits = self.dense_prediction(hidden_states).squeeze(-1)
|
||||||
|
|
||||||
return logits
|
return logits
|
||||||
|
|
||||||
|
|||||||
@@ -425,7 +425,7 @@ class TFElectraDiscriminatorPredictions(tf.keras.layers.Layer):
|
|||||||
def call(self, discriminator_hidden_states, training=False):
|
def call(self, discriminator_hidden_states, training=False):
|
||||||
hidden_states = self.dense(discriminator_hidden_states)
|
hidden_states = self.dense(discriminator_hidden_states)
|
||||||
hidden_states = get_tf_activation(self.config.hidden_act)(hidden_states)
|
hidden_states = get_tf_activation(self.config.hidden_act)(hidden_states)
|
||||||
logits = tf.squeeze(self.dense_prediction(hidden_states))
|
logits = tf.squeeze(self.dense_prediction(hidden_states), -1)
|
||||||
|
|
||||||
return logits
|
return logits
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import BertConfig, is_tf_available
|
from transformers import BertConfig, is_tf_available
|
||||||
from transformers.testing_utils import require_tf
|
from transformers.testing_utils import require_tf, slow
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
||||||
@@ -328,3 +328,27 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
self.assertEqual(sorted(output_loading_info["unexpected_keys"]), ["mlm___cls", "nsp___cls"])
|
self.assertEqual(sorted(output_loading_info["unexpected_keys"]), ["mlm___cls", "nsp___cls"])
|
||||||
for layer in output_loading_info["missing_keys"]:
|
for layer in output_loading_info["missing_keys"]:
|
||||||
self.assertTrue(layer.split("_")[0] in ["dropout", "classifier"])
|
self.assertTrue(layer.split("_")[0] in ["dropout", "classifier"])
|
||||||
|
|
||||||
|
|
||||||
|
class TFBertModelIntegrationTest(unittest.TestCase):
|
||||||
|
@slow
|
||||||
|
def test_inference_masked_lm(self):
|
||||||
|
model = TFBertForPreTraining.from_pretrained("lysandre/tiny-bert-random")
|
||||||
|
input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
|
||||||
|
output = model(input_ids)[0]
|
||||||
|
|
||||||
|
expected_shape = [1, 6, 10]
|
||||||
|
self.assertEqual(output.shape, expected_shape)
|
||||||
|
|
||||||
|
print(output[:, :3, :3])
|
||||||
|
|
||||||
|
expected_slice = tf.constant(
|
||||||
|
[
|
||||||
|
[
|
||||||
|
[0.03706957, 0.10124919, 0.03616843],
|
||||||
|
[-0.06099961, 0.02266058, 0.00601412],
|
||||||
|
[-0.06066202, 0.05684517, 0.02038802],
|
||||||
|
]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
|
||||||
|
|||||||
@@ -248,3 +248,19 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
for model_name in ["google/electra-small-discriminator"]:
|
for model_name in ["google/electra-small-discriminator"]:
|
||||||
model = TFElectraModel.from_pretrained(model_name)
|
model = TFElectraModel.from_pretrained(model_name)
|
||||||
self.assertIsNotNone(model)
|
self.assertIsNotNone(model)
|
||||||
|
|
||||||
|
|
||||||
|
class TFElectraModelIntegrationTest(unittest.TestCase):
|
||||||
|
@slow
|
||||||
|
def test_inference_masked_lm(self):
|
||||||
|
model = TFElectraForPreTraining.from_pretrained("lysandre/tiny-electra-random")
|
||||||
|
input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
|
||||||
|
output = model(input_ids)[0]
|
||||||
|
|
||||||
|
expected_shape = [1, 6]
|
||||||
|
self.assertEqual(output.shape, expected_shape)
|
||||||
|
|
||||||
|
print(output[:, :3])
|
||||||
|
|
||||||
|
expected_slice = tf.constant([[-0.24651965, 0.8835437, 1.823782]])
|
||||||
|
tf.debugging.assert_near(output[:, :3], expected_slice, atol=1e-4)
|
||||||
|
|||||||
@@ -622,3 +622,25 @@ class TFLongformerModelIntegrationTest(unittest.TestCase):
|
|||||||
tf.debugging.assert_near(tf.reduce_mean(loss), expected_loss, rtol=1e-4)
|
tf.debugging.assert_near(tf.reduce_mean(loss), expected_loss, rtol=1e-4)
|
||||||
tf.debugging.assert_near(tf.reduce_sum(prediction_scores), expected_prediction_scores_sum, rtol=1e-4)
|
tf.debugging.assert_near(tf.reduce_sum(prediction_scores), expected_prediction_scores_sum, rtol=1e-4)
|
||||||
tf.debugging.assert_near(tf.reduce_mean(prediction_scores), expected_prediction_scores_mean, rtol=1e-4)
|
tf.debugging.assert_near(tf.reduce_mean(prediction_scores), expected_prediction_scores_mean, rtol=1e-4)
|
||||||
|
|
||||||
|
@slow
|
||||||
|
def test_inference_masked_lm(self):
|
||||||
|
model = TFLongformerForMaskedLM.from_pretrained("lysandre/tiny-longformer-random")
|
||||||
|
input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
|
||||||
|
output = model(input_ids)[0]
|
||||||
|
|
||||||
|
expected_shape = [1, 6, 10]
|
||||||
|
self.assertEqual(output.shape, expected_shape)
|
||||||
|
|
||||||
|
print(output[:, :3, :3])
|
||||||
|
|
||||||
|
expected_slice = tf.constant(
|
||||||
|
[
|
||||||
|
[
|
||||||
|
[-0.04926379, 0.0367098, 0.02099686],
|
||||||
|
[0.03940692, 0.01547744, -0.01448723],
|
||||||
|
[0.03495252, -0.05900355, -0.01675752],
|
||||||
|
]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
|
||||||
|
|||||||
Reference in New Issue
Block a user