Fix T5 and BART for TF (#9063)

* Fix T5 for graphe compilation+execution * Fix BART * Fix import * Fix naming * fix attribute name * Oops * fix import * fix tests * fix tests * Update test * Add mising import * Address Patrick's comments * Style * Address Patrick's comment
2020-12-14 18:47:00 +01:00
parent a9c8bff724
commit df3f4d2aef
8 changed files with 151 additions and 166 deletions
--- a/tests/test_modeling_tf_bart.py
+++ b/tests/test_modeling_tf_bart.py
@@ -118,14 +118,6 @@ class TFBartModelTest(TFModelTesterMixin, unittest.TestCase):
        # inputs_embeds not supported
        pass

-    def test_saved_model_with_hidden_states_output(self):
-        # Should be uncommented during patrick TF refactor
-        pass
-
-    def test_saved_model_with_attentions_output(self):
-        # Should be uncommented during patrick TF refactor
-        pass
-
    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -171,6 +171,11 @@ class TFModelTesterMixin:

        for model_class in self.all_model_classes:
            class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
+            # A saved model is always executed in graph mode, since we merged the PR #8777
+            # the booleans in graph mode are always the ones in the config, then we update
+            # the use_cache property if it exists in order to have similar booleans with the inputs
+            if "use_cache" in class_inputs_dict:
+                config.use_cache = class_inputs_dict.pop("use_cache")
            model = model_class(config)
            num_out = len(model(class_inputs_dict))
            model._saved_model_inputs_spec = None
@@ -207,6 +212,11 @@ class TFModelTesterMixin:

        for model_class in self.all_model_classes:
            class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
+            # A saved model is always executed in graph mode, since we merged the PR #8777
+            # the booleans in graph mode are always the ones in the config, then we update
+            # the use_cache property if it exists in order to have similar booleans with the inputs
+            if "use_cache" in class_inputs_dict:
+                config.use_cache = class_inputs_dict.pop("use_cache")
            model = model_class(config)
            num_out = len(model(class_inputs_dict))
            model._saved_model_inputs_spec = None
@@ -249,10 +259,11 @@ class TFModelTesterMixin:
            if "T5" in main_layer_class.__name__:
                # Take the same values than in TFT5ModelTester for this shared layer
                shared = TFSharedEmbeddings(99, 32, name="shared")
-                config.use_cache = False
+                config.use_cache = inputs_dict.pop("use_cache", None)
                main_layer = main_layer_class(config, embed_tokens=shared)
            else:
                main_layer = main_layer_class(config)
+
            symbolic_inputs = {
                name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
            }
@@ -321,10 +332,13 @@ class TFModelTesterMixin:

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
-            pt_inputs_dict = dict(
-                (name, torch.from_numpy(key.numpy()).to(torch.long))
-                for name, key in self._prepare_for_class(inputs_dict, model_class).items()
-            )
+            pt_inputs_dict = {}
+            for name, key in self._prepare_for_class(inputs_dict, model_class).items():
+                if type(key) == bool:
+                    pt_inputs_dict[name] = key
+                else:
+                    pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.long)
+
            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")
@@ -358,10 +372,13 @@ class TFModelTesterMixin:

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
-            pt_inputs_dict = dict(
-                (name, torch.from_numpy(key.numpy()).to(torch.long))
-                for name, key in self._prepare_for_class(inputs_dict, model_class).items()
-            )
+            pt_inputs_dict = {}
+            for name, key in self._prepare_for_class(inputs_dict, model_class).items():
+                if type(key) == bool:
+                    key = np.array(key, dtype=bool)
+                    pt_inputs_dict[name] = torch.from_numpy(key).to(torch.long)
+                else:
+                    pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.long)
            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")
@@ -574,13 +591,29 @@ class TFModelTesterMixin:
                self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
            )

-            hidden_states = outputs[-1]
-            self.assertEqual(config.output_attentions, False)
-            self.assertEqual(len(hidden_states), expected_num_layers)
-            self.assertListEqual(
-                list(hidden_states[0].shape[-2:]),
-                [self.model_tester.seq_length, self.model_tester.hidden_size],
-            )
+            if model.config.is_encoder_decoder:
+                encoder_hidden_states = outputs.encoder_hidden_states
+                decoder_hidden_states = outputs.decoder_hidden_states
+
+                self.assertEqual(config.output_attentions, False)
+                self.assertEqual(len(encoder_hidden_states), expected_num_layers)
+                self.assertListEqual(
+                    list(encoder_hidden_states[0].shape[-2:]),
+                    [self.model_tester.seq_length, self.model_tester.hidden_size],
+                )
+                self.assertEqual(len(decoder_hidden_states), expected_num_layers)
+                self.assertListEqual(
+                    list(decoder_hidden_states[0].shape[-2:]),
+                    [self.model_tester.seq_length, self.model_tester.hidden_size],
+                )
+            else:
+                hidden_states = outputs.hidden_states
+                self.assertEqual(config.output_attentions, False)
+                self.assertEqual(len(hidden_states), expected_num_layers)
+                self.assertListEqual(
+                    list(hidden_states[0].shape[-2:]),
+                    [self.model_tester.seq_length, self.model_tester.hidden_size],
+                )

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
@@ -796,7 +829,7 @@ class TFModelTesterMixin:

    def test_lm_head_model_random_beam_search_generate(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        input_ids = inputs_dict["input_ids"] if "input_ids" in inputs_dict else inputs_dict["inputs"]
+        input_ids = inputs_dict["input_ids"]

        for model_class in self.all_generative_model_classes:
            model = model_class(config)
--- a/tests/test_modeling_tf_t5.py
+++ b/tests/test_modeling_tf_t5.py
@@ -133,8 +133,6 @@ class TFT5ModelTester:
        self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf))
        self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1)

-        output, past_key_values = outputs
-
        # create hypothetical next token and extent to next_input_ids
        next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)

@@ -142,7 +140,7 @@ class TFT5ModelTester:
        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)

        output_from_no_past = model(next_input_ids)[0]
-        output_from_past = model(next_tokens, past_key_values=past_key_values)[0]
+        output_from_past = model(next_tokens, past_key_values=outputs.past_key_values)[0]

        # select random slice
        random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
@@ -164,7 +162,7 @@ class TFT5ModelTester:
        attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1)

        # first forward pass
-        _, past_key_values = model(input_ids, attention_mask=attn_mask, use_cache=True)
+        outputs = model(input_ids, attention_mask=attn_mask, use_cache=True)

        # create hypothetical next token and extent to next_input_ids
        next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
@@ -187,7 +185,7 @@ class TFT5ModelTester:

        # get two different outputs
        output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
-        output_from_past = model(next_tokens, past_key_values=past_key_values, attention_mask=attn_mask)[0]
+        output_from_past = model(next_tokens, past_key_values=outputs.past_key_values, attention_mask=attn_mask)[0]

        # select random slice
        random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item()
@@ -208,8 +206,6 @@ class TFT5ModelTester:
        # first forward pass
        outputs = model(input_ids, use_cache=True)

-        output, past_key_values = outputs
-
        # create hypothetical next token and extent to next_input_ids
        next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)

@@ -217,7 +213,7 @@ class TFT5ModelTester:
        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)

        output_from_no_past = model(next_input_ids)[0]
-        output_from_past = model(next_tokens, past_key_values=past_key_values)[0]
+        output_from_past = model(next_tokens, past_key_values=outputs.past_key_values)[0]

        self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])

@@ -236,7 +232,7 @@ class TFT5ModelTester:
            "input_ids": input_ids,
            "decoder_input_ids": input_ids,
            "decoder_attention_mask": input_mask,
-            "use_cache": tf.convert_to_tensor([False]),
+            "use_cache": False,
        }
        return config, inputs_dict

@@ -298,14 +294,6 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
        model = TFT5Model.from_pretrained("t5-small")
        self.assertIsNotNone(model)

-    @slow
-    def test_saved_model_with_attentions_output(self):
-        pass
-
-    @slow
-    def test_saved_model_with_hidden_states_output(self):
-        pass
-

 class TFT5EncoderOnlyModelTester:
    def __init__(
@@ -411,6 +399,7 @@ class TFT5EncoderOnlyModelTester:


 class TFT5EncoderOnlyModelTest(TFModelTesterMixin, unittest.TestCase):
+    is_encoder_decoder = False
    all_model_classes = (TFT5EncoderModel,) if is_tf_available() else ()

    def setUp(self):