Tensorflow improvements (#4530)

* Better None gradients handling * Apply Style * Apply Style * Create a loss class per task to compute its respective loss * Add loss classes to the ALBERT TF models * Add loss classes to the BERT TF models * Add question answering and multiple choice to TF Camembert * Remove prints * Add multiple choice model to TF DistilBERT + loss computation * Add question answering model to TF Electra + loss computation * Add token classification, question answering and multiple choice models to TF Flaubert * Add multiple choice model to TF Roberta + loss computation * Add multiple choice model to TF XLM + loss computation * Add multiple choice and question answering models to TF XLM-Roberta * Add multiple choice model to TF XLNet + loss computation * Remove unused parameters * Add task loss classes * Reorder TF imports + add new model classes * Add new model classes * Bugfix in TF T5 model * Bugfix for TF T5 tests * Bugfix in TF T5 model * Fix TF T5 model tests * Fix T5 tests + some renaming * Fix inheritance issue in the AutoX tests * Add tests for TF Flaubert and TF XLM Roberta * Add tests for TF Flaubert and TF XLM Roberta * Remove unused piece of code in the TF trainer * bugfix and remove unused code * Bugfix for TF 2.2 * Apply Style * Divide TFSequenceClassificationAndMultipleChoiceLoss into their two respective name * Apply style * Mirror the PT Trainer in the TF one: fp16, optimizers and tb_writer as class parameter and better dataset handling * Fix TF optimizations tests and apply style * Remove useless parameter * Bugfix and apply style * Fix TF Trainer prediction * Now the TF models return the loss such as their PyTorch couterparts * Apply Style * Ignore some tests output * Take into account the SQuAD cls_index, p_mask and is_impossible parameters for the QuestionAnswering task models. * Fix names for SQuAD data * Apply Style * Fix conflicts with 2.11 release * Fix conflicts with 2.11 * Fix wrongname * Add better documentation on the new create_optimizer function * Fix isort * logging_dir: use same default as PyTorch Co-authored-by: Julien Chaumond <chaumond@gmail.com>
2020-06-05 01:45:53 +02:00
parent ccd26c2862
commit f9414f7553
27 changed files with 2380 additions and 558 deletions
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -30,7 +30,7 @@ if is_tf_available():
    import tensorflow as tf
    import numpy as np

-    from transformers import tf_top_k_top_p_filtering, TFAdaptiveEmbedding
+    from transformers import tf_top_k_top_p_filtering, TFAdaptiveEmbedding, TFSharedEmbeddings

    if _tf_gpu_memory_limit is not None:
        gpus = tf.config.list_physical_devices("GPU")
@@ -107,26 +107,45 @@ class TFModelTesterMixin:
            and getattr(module_member, "_keras_serializable", False)
        )
        for main_layer_class in tf_main_layer_classes:
-            main_layer = main_layer_class(config)
+            # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
+            if "T5" in main_layer_class.__name__:
+                # Take the same values than in TFT5ModelTester for this shared layer
+                shared = TFSharedEmbeddings(99, 32, name="shared")
+                main_layer = main_layer_class(config, embed_tokens=shared)
+            else:
+                main_layer = main_layer_class(config)
            symbolic_inputs = {
                name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
            }
+
            model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
            outputs = model(inputs_dict)

            with tempfile.TemporaryDirectory() as tmpdirname:
                filepath = os.path.join(tmpdirname, "keras_model.h5")
                model.save(filepath)
-                model = tf.keras.models.load_model(
-                    filepath, custom_objects={main_layer_class.__name__: main_layer_class}
-                )
+                if "T5" in main_layer_class.__name__:
+                    model = tf.keras.models.load_model(
+                        filepath,
+                        custom_objects={
+                            main_layer_class.__name__: main_layer_class,
+                            "TFSharedEmbeddings": TFSharedEmbeddings,
+                        },
+                    )
+                else:
+                    model = tf.keras.models.load_model(
+                        filepath, custom_objects={main_layer_class.__name__: main_layer_class}
+                    )
                assert isinstance(model, tf.keras.Model)
                after_outputs = model(inputs_dict)
                self.assert_outputs_same(after_outputs, outputs)

    def assert_outputs_same(self, after_outputs, outputs):
        # Make sure we don't have nans
-        out_1 = after_outputs[0].numpy()
+        if isinstance(after_outputs, tf.Tensor):
+            out_1 = after_outputs.numpy()
+        else:
+            out_1 = after_outputs[0].numpy()
        out_2 = outputs[0].numpy()
        self.assertEqual(out_1.shape, out_2.shape)
        out_1 = out_1[~np.isnan(out_1)]
@@ -269,7 +288,6 @@ class TFModelTesterMixin:
            inputs_keywords = copy.deepcopy(inputs_dict)
            input_ids = inputs_keywords.pop("input_ids" if not self.is_encoder_decoder else "inputs", None,)
            outputs_keywords = model(input_ids, **inputs_keywords)
-
            output_dict = outputs_dict[0].numpy()
            output_keywords = outputs_keywords[0].numpy()

--- a/tests/test_modeling_tf_flaubert.py
+++ b/tests/test_modeling_tf_flaubert.py
@@ -0,0 +1,54 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import is_tf_available
+
+from .utils import require_tf, slow
+
+
+if is_tf_available():
+    import tensorflow as tf
+    import numpy as np
+    from transformers import TFFlaubertModel
+
+
+@require_tf
+class TFFlaubertModelIntegrationTest(unittest.TestCase):
+    @slow
+    def test_output_embeds_base_model(self):
+        model = TFFlaubertModel.from_pretrained("jplu/tf-flaubert-small-cased")
+
+        input_ids = tf.convert_to_tensor(
+            [[0, 158, 735, 2592, 1424, 6727, 82, 1]], dtype=tf.int32,
+        )  # "J'aime flaubert !"
+
+        output = model(input_ids)[0]
+        expected_shape = tf.TensorShape((1, 8, 512))
+        self.assertEqual(output.shape, expected_shape)
+        # compare the actual values for a slice.
+        expected_slice = tf.convert_to_tensor(
+            [
+                [
+                    [-1.8768773, -1.566555, 0.27072418],
+                    [-1.6920038, -0.5873505, 1.9329599],
+                    [-2.9563985, -1.6993835, 1.7972052],
+                ]
+            ],
+            dtype=tf.float32,
+        )
+
+        self.assertTrue(np.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
--- a/tests/test_modeling_tf_xlm_roberta.py
+++ b/tests/test_modeling_tf_xlm_roberta.py
@@ -0,0 +1,55 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import is_tf_available
+
+from .utils import require_tf, slow
+
+
+if is_tf_available():
+    import tensorflow as tf
+    import numpy as np
+    from transformers import TFXLMRobertaModel
+
+
+@require_tf
+class TFFlaubertModelIntegrationTest(unittest.TestCase):
+    @slow
+    def test_output_embeds_base_model(self):
+        model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-base")
+
+        features = {
+            "input_ids": tf.convert_to_tensor([[0, 2646, 10269, 83, 99942, 2]], dtype=tf.int32),  # "My dog is cute"
+            "attention_mask": tf.convert_to_tensor([[1, 1, 1, 1, 1, 1]], dtype=tf.int32),
+        }
+
+        output = model(features)[0]
+        expected_shape = tf.TensorShape((1, 6, 768))
+        self.assertEqual(output.shape, expected_shape)
+        # compare the actual values for a slice.
+        expected_slice = tf.convert_to_tensor(
+            [
+                [
+                    [0.0681762, 0.10894451, 0.06772504],
+                    [-0.06423668, 0.02366615, 0.04329344],
+                    [-0.06057295, 0.09974135, -0.00070584],
+                ]
+            ],
+            dtype=tf.float32,
+        )
+
+        self.assertTrue(np.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
--- a/tests/test_optimization_tf.py
+++ b/tests/test_optimization_tf.py
@@ -47,7 +47,7 @@ class OptimizationFTest(unittest.TestCase):
        with strategy.scope():
            accumulator = GradientAccumulator()
            variable = tf.Variable([4.0, 3.0])
-            optimizer = create_optimizer(5e-5, 10, 5)
+            optimizer, _ = create_optimizer(5e-5, 10, 5)
            gradient_placeholder = tf.Variable([0.0, 0.0], trainable=False)

        def accumulate_on_replica(gradient):