[Almost all TF models] TF clean up: add missing CLM / MLM loss; fix T5 naming and keras compile (#5395)

* add first version of clm tf * make style * add more tests for bert * update tf clm loss * fix tests * correct tf ner script * add mlm loss * delete bogus file * clean tf auto model + add tests * finish adding clm loss everywhere * fix training in distilbert * fix flake8 * save intermediate * fix tf t5 naming * remove prints * finish up * up * fix tf gpt2 * fix new test utils import * fix flake8 * keep backward compatibility * Update src/transformers/modeling_tf_albert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_auto.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_electra.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_roberta.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_mobilebert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_auto.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_bert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_distilbert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * apply sylvains suggestions Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2020-07-07 18:15:53 +02:00
parent 33e43edddc
commit 4dc65591b5
23 changed files with 1516 additions and 315 deletions
--- a/tests/test_modeling_tf_distilbert.py
+++ b/tests/test_modeling_tf_distilbert.py
@@ -17,7 +17,7 @@
 import unittest

 from transformers import DistilBertConfig, is_tf_available
-from transformers.testing_utils import require_tf
+from transformers.testing_utils import require_tf, slow

 from .test_configuration_common import ConfigTester
 from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
@@ -32,6 +32,7 @@ if is_tf_available():
        TFDistilBertForSequenceClassification,
        TFDistilBertForTokenClassification,
        TFDistilBertForMultipleChoice,
+        TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
    )


@@ -118,9 +119,7 @@ class TFDistilBertModelTester:
        model = TFDistilBertForMaskedLM(config=config)
        inputs = {"input_ids": input_ids, "attention_mask": input_mask}
        (prediction_scores,) = model(inputs)
-        result = {
-            "prediction_scores": prediction_scores.numpy(),
-        }
+        result = {"prediction_scores": prediction_scores.numpy()}
        self.parent.assertListEqual(
            list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
        )
@@ -129,12 +128,12 @@ class TFDistilBertModelTester:
        self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFDistilBertForQuestionAnswering(config=config)
-        inputs = {"input_ids": input_ids, "attention_mask": input_mask}
-        start_logits, end_logits = model(inputs)
-        result = {
-            "start_logits": start_logits.numpy(),
-            "end_logits": end_logits.numpy(),
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": input_mask,
        }
+        start_logits, end_logits = model(inputs)
+        result = {"start_logits": start_logits.numpy(), "end_logits": end_logits.numpy()}
        self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
        self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])

@@ -145,9 +144,7 @@ class TFDistilBertModelTester:
        model = TFDistilBertForSequenceClassification(config)
        inputs = {"input_ids": input_ids, "attention_mask": input_mask}
        (logits,) = model(inputs)
-        result = {
-            "logits": logits.numpy(),
-        }
+        result = {"logits": logits.numpy()}
        self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])

    def create_and_check_distilbert_for_multiple_choice(
@@ -162,9 +159,7 @@ class TFDistilBertModelTester:
            "attention_mask": multiple_choice_input_mask,
        }
        (logits,) = model(inputs)
-        result = {
-            "logits": logits.numpy(),
-        }
+        result = {"logits": logits.numpy()}
        self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])

    def create_and_check_distilbert_for_token_classification(
@@ -236,8 +231,8 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_distilbert_for_token_classification(*config_and_inputs)

-    # @slow
-    # def test_model_from_pretrained(self):
-    #     for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
-    #         model = DistilBertModesss.from_pretrained(model_name)
-    #         self.assertIsNotNone(model)
+    @slow
+    def test_model_from_pretrained(self):
+        for model_name in list(TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]):
+            model = TFDistilBertModel.from_pretrained(model_name)
+            self.assertIsNotNone(model)