[Almost all TF models] TF clean up: add missing CLM / MLM loss; fix T5 naming and keras compile (#5395)

* add first version of clm tf * make style * add more tests for bert * update tf clm loss * fix tests * correct tf ner script * add mlm loss * delete bogus file * clean tf auto model + add tests * finish adding clm loss everywhere * fix training in distilbert * fix flake8 * save intermediate * fix tf t5 naming * remove prints * finish up * up * fix tf gpt2 * fix new test utils import * fix flake8 * keep backward compatibility * Update src/transformers/modeling_tf_albert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_auto.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_electra.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_roberta.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_mobilebert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_auto.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_bert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_distilbert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * apply sylvains suggestions Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2020-07-07 18:15:53 +02:00
parent 33e43edddc
commit 4dc65591b5
23 changed files with 1516 additions and 315 deletions
--- a/tests/test_modeling_tf_t5.py
+++ b/tests/test_modeling_tf_t5.py
@@ -77,6 +77,7 @@ class TFT5ModelTester:
            eos_token_id=self.eos_token_id,
            bos_token_id=self.pad_token_id,
            pad_token_id=self.pad_token_id,
+            decoder_start_token_id=self.pad_token_id,
        )

        return (config, input_ids, input_mask, token_labels)
@@ -84,7 +85,7 @@ class TFT5ModelTester:
    def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels):
        model = TFT5Model(config=config)
        inputs = {
-            "inputs": input_ids,
+            "input_ids": input_ids,
            "decoder_input_ids": input_ids,
            "decoder_attention_mask": input_mask,
        }
@@ -115,7 +116,7 @@ class TFT5ModelTester:
    def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels):
        model = TFT5ForConditionalGeneration(config=config)
        inputs_dict = {
-            "inputs": input_ids,
+            "input_ids": input_ids,
            "decoder_input_ids": input_ids,
            "decoder_attention_mask": input_mask,
        }
@@ -209,7 +210,7 @@ class TFT5ModelTester:
        config_and_inputs = self.prepare_config_and_inputs()
        (config, input_ids, input_mask, token_labels) = config_and_inputs
        inputs_dict = {
-            "inputs": input_ids,
+            "input_ids": input_ids,
            "decoder_input_ids": input_ids,
            "decoder_attention_mask": input_mask,
            "use_cache": tf.convert_to_tensor([False]),