Tensorflow improvements (#4530)

* Better None gradients handling * Apply Style * Apply Style * Create a loss class per task to compute its respective loss * Add loss classes to the ALBERT TF models * Add loss classes to the BERT TF models * Add question answering and multiple choice to TF Camembert * Remove prints * Add multiple choice model to TF DistilBERT + loss computation * Add question answering model to TF Electra + loss computation * Add token classification, question answering and multiple choice models to TF Flaubert * Add multiple choice model to TF Roberta + loss computation * Add multiple choice model to TF XLM + loss computation * Add multiple choice and question answering models to TF XLM-Roberta * Add multiple choice model to TF XLNet + loss computation * Remove unused parameters * Add task loss classes * Reorder TF imports + add new model classes * Add new model classes * Bugfix in TF T5 model * Bugfix for TF T5 tests * Bugfix in TF T5 model * Fix TF T5 model tests * Fix T5 tests + some renaming * Fix inheritance issue in the AutoX tests * Add tests for TF Flaubert and TF XLM Roberta * Add tests for TF Flaubert and TF XLM Roberta * Remove unused piece of code in the TF trainer * bugfix and remove unused code * Bugfix for TF 2.2 * Apply Style * Divide TFSequenceClassificationAndMultipleChoiceLoss into their two respective name * Apply style * Mirror the PT Trainer in the TF one: fp16, optimizers and tb_writer as class parameter and better dataset handling * Fix TF optimizations tests and apply style * Remove useless parameter * Bugfix and apply style * Fix TF Trainer prediction * Now the TF models return the loss such as their PyTorch couterparts * Apply Style * Ignore some tests output * Take into account the SQuAD cls_index, p_mask and is_impossible parameters for the QuestionAnswering task models. * Fix names for SQuAD data * Apply Style * Fix conflicts with 2.11 release * Fix conflicts with 2.11 * Fix wrongname * Add better documentation on the new create_optimizer function * Fix isort * logging_dir: use same default as PyTorch Co-authored-by: Julien Chaumond <chaumond@gmail.com>
2020-06-05 01:45:53 +02:00
parent ccd26c2862
commit f9414f7553
27 changed files with 2380 additions and 558 deletions
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -30,7 +30,7 @@ if is_tf_available():
    import tensorflow as tf
    import numpy as np

-    from transformers import tf_top_k_top_p_filtering, TFAdaptiveEmbedding
+    from transformers import tf_top_k_top_p_filtering, TFAdaptiveEmbedding, TFSharedEmbeddings

    if _tf_gpu_memory_limit is not None:
        gpus = tf.config.list_physical_devices("GPU")
@@ -107,26 +107,45 @@ class TFModelTesterMixin:
            and getattr(module_member, "_keras_serializable", False)
        )
        for main_layer_class in tf_main_layer_classes:
-            main_layer = main_layer_class(config)
+            # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
+            if "T5" in main_layer_class.__name__:
+                # Take the same values than in TFT5ModelTester for this shared layer
+                shared = TFSharedEmbeddings(99, 32, name="shared")
+                main_layer = main_layer_class(config, embed_tokens=shared)
+            else:
+                main_layer = main_layer_class(config)
            symbolic_inputs = {
                name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
            }
+
            model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
            outputs = model(inputs_dict)

            with tempfile.TemporaryDirectory() as tmpdirname:
                filepath = os.path.join(tmpdirname, "keras_model.h5")
                model.save(filepath)
-                model = tf.keras.models.load_model(
-                    filepath, custom_objects={main_layer_class.__name__: main_layer_class}
-                )
+                if "T5" in main_layer_class.__name__:
+                    model = tf.keras.models.load_model(
+                        filepath,
+                        custom_objects={
+                            main_layer_class.__name__: main_layer_class,
+                            "TFSharedEmbeddings": TFSharedEmbeddings,
+                        },
+                    )
+                else:
+                    model = tf.keras.models.load_model(
+                        filepath, custom_objects={main_layer_class.__name__: main_layer_class}
+                    )
                assert isinstance(model, tf.keras.Model)
                after_outputs = model(inputs_dict)
                self.assert_outputs_same(after_outputs, outputs)

    def assert_outputs_same(self, after_outputs, outputs):
        # Make sure we don't have nans
-        out_1 = after_outputs[0].numpy()
+        if isinstance(after_outputs, tf.Tensor):
+            out_1 = after_outputs.numpy()
+        else:
+            out_1 = after_outputs[0].numpy()
        out_2 = outputs[0].numpy()
        self.assertEqual(out_1.shape, out_2.shape)
        out_1 = out_1[~np.isnan(out_1)]
@@ -269,7 +288,6 @@ class TFModelTesterMixin:
            inputs_keywords = copy.deepcopy(inputs_dict)
            input_ids = inputs_keywords.pop("input_ids" if not self.is_encoder_decoder else "inputs", None,)
            outputs_keywords = model(input_ids, **inputs_keywords)
-
            output_dict = outputs_dict[0].numpy()
            output_keywords = outputs_keywords[0].numpy()