From 9fd1f56236d057dbd06e4c67019ca8f4ac231908 Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Tue, 10 Nov 2020 14:02:33 -0500 Subject: [PATCH] [No merge] TF integration testing (#7621) * stash * TF Integration testing for ELECTRA, BERT, Longformer * Trigger slow tests * Apply suggestions from code review --- src/transformers/modeling_electra.py | 2 +- src/transformers/modeling_tf_electra.py | 2 +- tests/test_modeling_tf_bert.py | 26 ++++++++++++++++++++++++- tests/test_modeling_tf_electra.py | 16 +++++++++++++++ tests/test_modeling_tf_longformer.py | 22 +++++++++++++++++++++ 5 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/transformers/modeling_electra.py b/src/transformers/modeling_electra.py index e244ac1c55..69dffa59b3 100644 --- a/src/transformers/modeling_electra.py +++ b/src/transformers/modeling_electra.py @@ -514,7 +514,7 @@ class ElectraDiscriminatorPredictions(nn.Module): def forward(self, discriminator_hidden_states): hidden_states = self.dense(discriminator_hidden_states) hidden_states = get_activation(self.config.hidden_act)(hidden_states) - logits = self.dense_prediction(hidden_states).squeeze() + logits = self.dense_prediction(hidden_states).squeeze(-1) return logits diff --git a/src/transformers/modeling_tf_electra.py b/src/transformers/modeling_tf_electra.py index 0f5ec71236..4fc3492378 100644 --- a/src/transformers/modeling_tf_electra.py +++ b/src/transformers/modeling_tf_electra.py @@ -425,7 +425,7 @@ class TFElectraDiscriminatorPredictions(tf.keras.layers.Layer): def call(self, discriminator_hidden_states, training=False): hidden_states = self.dense(discriminator_hidden_states) hidden_states = get_tf_activation(self.config.hidden_act)(hidden_states) - logits = tf.squeeze(self.dense_prediction(hidden_states)) + logits = tf.squeeze(self.dense_prediction(hidden_states), -1) return logits diff --git a/tests/test_modeling_tf_bert.py b/tests/test_modeling_tf_bert.py index 6fda686aea..0541637499 100644 --- a/tests/test_modeling_tf_bert.py +++ b/tests/test_modeling_tf_bert.py @@ -17,7 +17,7 @@ import unittest from transformers import BertConfig, is_tf_available -from transformers.testing_utils import require_tf +from transformers.testing_utils import require_tf, slow from .test_configuration_common import ConfigTester from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor @@ -328,3 +328,27 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): self.assertEqual(sorted(output_loading_info["unexpected_keys"]), ["mlm___cls", "nsp___cls"]) for layer in output_loading_info["missing_keys"]: self.assertTrue(layer.split("_")[0] in ["dropout", "classifier"]) + + +class TFBertModelIntegrationTest(unittest.TestCase): + @slow + def test_inference_masked_lm(self): + model = TFBertForPreTraining.from_pretrained("lysandre/tiny-bert-random") + input_ids = tf.constant([[0, 1, 2, 3, 4, 5]]) + output = model(input_ids)[0] + + expected_shape = [1, 6, 10] + self.assertEqual(output.shape, expected_shape) + + print(output[:, :3, :3]) + + expected_slice = tf.constant( + [ + [ + [0.03706957, 0.10124919, 0.03616843], + [-0.06099961, 0.02266058, 0.00601412], + [-0.06066202, 0.05684517, 0.02038802], + ] + ] + ) + tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4) diff --git a/tests/test_modeling_tf_electra.py b/tests/test_modeling_tf_electra.py index 2c1daf4557..95a570a6a5 100644 --- a/tests/test_modeling_tf_electra.py +++ b/tests/test_modeling_tf_electra.py @@ -248,3 +248,19 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): for model_name in ["google/electra-small-discriminator"]: model = TFElectraModel.from_pretrained(model_name) self.assertIsNotNone(model) + + +class TFElectraModelIntegrationTest(unittest.TestCase): + @slow + def test_inference_masked_lm(self): + model = TFElectraForPreTraining.from_pretrained("lysandre/tiny-electra-random") + input_ids = tf.constant([[0, 1, 2, 3, 4, 5]]) + output = model(input_ids)[0] + + expected_shape = [1, 6] + self.assertEqual(output.shape, expected_shape) + + print(output[:, :3]) + + expected_slice = tf.constant([[-0.24651965, 0.8835437, 1.823782]]) + tf.debugging.assert_near(output[:, :3], expected_slice, atol=1e-4) diff --git a/tests/test_modeling_tf_longformer.py b/tests/test_modeling_tf_longformer.py index 0fa0bb68a8..d9f6d93d61 100644 --- a/tests/test_modeling_tf_longformer.py +++ b/tests/test_modeling_tf_longformer.py @@ -622,3 +622,25 @@ class TFLongformerModelIntegrationTest(unittest.TestCase): tf.debugging.assert_near(tf.reduce_mean(loss), expected_loss, rtol=1e-4) tf.debugging.assert_near(tf.reduce_sum(prediction_scores), expected_prediction_scores_sum, rtol=1e-4) tf.debugging.assert_near(tf.reduce_mean(prediction_scores), expected_prediction_scores_mean, rtol=1e-4) + + @slow + def test_inference_masked_lm(self): + model = TFLongformerForMaskedLM.from_pretrained("lysandre/tiny-longformer-random") + input_ids = tf.constant([[0, 1, 2, 3, 4, 5]]) + output = model(input_ids)[0] + + expected_shape = [1, 6, 10] + self.assertEqual(output.shape, expected_shape) + + print(output[:, :3, :3]) + + expected_slice = tf.constant( + [ + [ + [-0.04926379, 0.0367098, 0.02099686], + [0.03940692, 0.01547744, -0.01448723], + [0.03495252, -0.05900355, -0.01675752], + ] + ] + ) + tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)