TF Model train and eval step metrics for seq2seq models. (#14009)

* TF Model train and eval step metrics for seq2seq models. When using a model with a seq2seq output compute metrics against logits. * Removing vestigial code Co-authored-by: matt <rocketknight1@gmail.com>
2021-10-19 13:14:21 +02:00
parent fde4867f97
commit 122c2f81b7
2 changed files with 40 additions and 8 deletions
--- a/tests/test_modeling_tf_t5.py
+++ b/tests/test_modeling_tf_t5.py
@@ -666,3 +666,33 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
        translation = tok.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)

        self.assertEqual(translation, expected_translation)
+
+    def test_finetune_keras_trainer(self):
+        """Ensure that the model can be fine-tuned via the keras API and
+        that metrics work as expected.
+        """
+
+        # This metric expects to be called with the logits output
+        def _accuracy(y_true, y_pred):
+            return tf.keras.metrics.sparse_categorical_crossentropy(y_true[:, 0], y_pred[:, 0])
+
+        # measure the accuracy of the first token
+        class FirstTokenAccuracy(tf.keras.metrics.MeanMetricWrapper):
+            def __init__(self, name="accuracy", **kwargs):
+                super().__init__(_accuracy, name=name, **kwargs)
+
+        model = self.model
+        model.compile("adam", metrics=FirstTokenAccuracy())
+        tokenizer = T5Tokenizer.from_pretrained("t5-small")
+
+        examples = [
+            ("sentiment: Everything is awesome!", "positive"),
+            ("sentiment: Tensorflow datasets are hard to use", "negative"),
+        ]
+
+        inputs = dict(tokenizer([x[0] for x in examples], padding=True, return_tensors="tf"))
+        inputs["labels"] = tokenizer([x[1] for x in examples], return_tensors="tf").input_ids
+
+        model.fit(inputs)
+        m = model.evaluate(inputs)
+        self.assertEqual(len(m), 2)