[cleanup] Hoist ModelTester objects to top level (#4939)

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
2020-06-16 17:33:43 +05:30
parent 0c55a384f8
commit c852036b4a
25 changed files with 4721 additions and 5212 deletions
--- a/tests/test_modeling_tf_t5.py
+++ b/tests/test_modeling_tf_t5.py
@@ -28,6 +28,186 @@ if is_tf_available():
    from transformers import TFT5Model, TFT5ForConditionalGeneration, T5Tokenizer


+class TFT5ModelTester:
+    def __init__(
+        self, parent,
+    ):
+        self.parent = parent
+        self.batch_size = 13
+        self.seq_length = 7
+        self.is_training = True
+        self.use_input_mask = True
+        self.use_labels = True
+        self.vocab_size = 99
+        self.n_positions = 14
+        self.hidden_size = 32
+        self.num_hidden_layers = 5
+        self.num_attention_heads = 4
+        self.d_ff = 37
+        self.relative_attention_num_buckets = 8
+        self.dropout_rate = 0.1
+        self.initializer_factor = 0.002
+        self.eos_token_id = 1
+        self.pad_token_id = 0
+        self.scope = None
+
+    def prepare_config_and_inputs(self):
+        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
+
+        input_mask = None
+        if self.use_input_mask:
+            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+
+        token_labels = None
+        if self.use_labels:
+            token_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
+
+        config = T5Config(
+            vocab_size=self.vocab_size,
+            n_positions=self.n_positions,
+            d_model=self.hidden_size,
+            d_ff=self.d_ff,
+            d_kv=self.hidden_size // self.num_attention_heads,
+            num_layers=self.num_hidden_layers,
+            num_heads=self.num_attention_heads,
+            relative_attention_num_buckets=self.relative_attention_num_buckets,
+            dropout_rate=self.dropout_rate,
+            initializer_factor=self.initializer_factor,
+            eos_token_id=self.eos_token_id,
+            bos_token_id=self.pad_token_id,
+            pad_token_id=self.pad_token_id,
+        )
+
+        return (config, input_ids, input_mask, token_labels)
+
+    def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels):
+        model = TFT5Model(config=config)
+        inputs = {
+            "inputs": input_ids,
+            "decoder_input_ids": input_ids,
+            "decoder_attention_mask": input_mask,
+        }
+        decoder_output, decoder_past, encoder_output = model(inputs)
+
+        decoder_output, decoder_past, encoder_output = model(
+            input_ids, decoder_attention_mask=input_mask, decoder_input_ids=input_ids
+        )
+        result = {
+            "encoder_output": encoder_output.numpy(),
+            "decoder_past": decoder_past,
+            "decoder_output": decoder_output.numpy(),
+        }
+        self.parent.assertListEqual(
+            list(result["encoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
+        )
+        self.parent.assertListEqual(
+            list(result["decoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
+        )
+        self.parent.assertEqual(len(decoder_past), 2)
+        # decoder_past[0] should correspond to encoder output
+        self.parent.assertTrue(tf.reduce_all(tf.math.equal(decoder_past[0][0], encoder_output)))
+        # There should be `num_layers` key value embeddings stored in decoder_past[1]
+        self.parent.assertEqual(len(decoder_past[1]), config.num_layers)
+        # There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple
+        self.parent.assertEqual(len(decoder_past[1][0]), 4)
+
+    def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels):
+        model = TFT5ForConditionalGeneration(config=config)
+        inputs_dict = {
+            "inputs": input_ids,
+            "decoder_input_ids": input_ids,
+            "decoder_attention_mask": input_mask,
+        }
+
+        prediction_scores, _, _ = model(inputs_dict)
+
+        result = {
+            "prediction_scores": prediction_scores.numpy(),
+        }
+        self.parent.assertListEqual(
+            list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
+        )
+
+    def create_and_check_t5_decoder_model_past(self, config, input_ids, decoder_input_ids, attention_mask):
+        model = TFT5Model(config=config).get_decoder()
+
+        input_ids = input_ids[:1, :]
+        self.batch_size = 1
+
+        # first forward pass
+        _, past_key_value_states = model(input_ids, use_cache=True)
+
+        # create hypothetical next token and extent to next_input_ids
+        next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
+
+        # append to next input_ids and
+        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
+
+        output_from_no_past = model(next_input_ids)[0]
+        output_from_past = model(next_tokens, past_key_value_states=past_key_value_states)[0]
+
+        # select random slice
+        random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
+        output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
+        output_from_past_slice = output_from_past[:, 0, random_slice_idx]
+
+        # test that outputs are equal for slice
+        tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
+
+    def create_and_check_t5_decoder_model_attention_mask_past(
+        self, config, input_ids, decoder_input_ids, attention_mask
+    ):
+        model = TFT5Model(config=config).get_decoder()
+
+        # create attention mask
+        half_seq_length = self.seq_length // 2
+        attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32)
+        attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32)
+        attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1)
+
+        # first forward pass
+        _, past_key_value_states = model(input_ids, attention_mask=attn_mask, use_cache=True)
+
+        # create hypothetical next token and extent to next_input_ids
+        next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
+
+        # change a random masked slice from input_ids
+        random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1
+        random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size)
+        vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change)
+        condition = tf.transpose(
+            tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size))
+        )
+        input_ids = tf.where(condition, random_other_next_tokens, input_ids)
+
+        # append to next input_ids and attn_mask
+        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
+        attn_mask = tf.concat([attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)], axis=1,)
+
+        # get two different outputs
+        output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
+        output_from_past = model(next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask)[0]
+
+        # select random slice
+        random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item()
+        output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
+        output_from_past_slice = output_from_past[:, 0, random_slice_idx]
+
+        # test that outputs are equal for slice
+        tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
+
+    def prepare_config_and_inputs_for_common(self):
+        config_and_inputs = self.prepare_config_and_inputs()
+        (config, input_ids, input_mask, token_labels) = config_and_inputs
+        inputs_dict = {
+            "inputs": input_ids,
+            "decoder_input_ids": input_ids,
+            "decoder_attention_mask": input_mask,
+            "use_cache": tf.convert_to_tensor([False]),
+        }
+        return config, inputs_dict
+
+
@require_tf
 class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):

@@ -35,207 +215,8 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
    all_model_classes = (TFT5Model, TFT5ForConditionalGeneration) if is_tf_available() else ()
    all_generative_model_classes = (TFT5ForConditionalGeneration,) if is_tf_available() else ()

-    class TFT5ModelTester(object):
-        def __init__(
-            self,
-            parent,
-            batch_size=13,
-            seq_length=7,
-            is_training=True,
-            use_input_mask=True,
-            use_labels=True,
-            vocab_size=99,
-            n_positions=14,
-            hidden_size=32,
-            num_hidden_layers=5,
-            num_attention_heads=4,
-            d_ff=37,
-            relative_attention_num_buckets=8,
-            dropout_rate=0.1,
-            initializer_factor=0.002,
-            eos_token_id=1,
-            pad_token_id=0,
-            scope=None,
-        ):
-            self.parent = parent
-            self.batch_size = batch_size
-            self.seq_length = seq_length
-            self.is_training = is_training
-            self.use_input_mask = use_input_mask
-            self.use_labels = use_labels
-            self.vocab_size = vocab_size
-            self.n_positions = n_positions
-            self.hidden_size = hidden_size
-            self.num_hidden_layers = num_hidden_layers
-            self.num_attention_heads = num_attention_heads
-            self.d_ff = d_ff
-            self.relative_attention_num_buckets = relative_attention_num_buckets
-            self.dropout_rate = dropout_rate
-            self.initializer_factor = initializer_factor
-            self.eos_token_id = eos_token_id
-            self.pad_token_id = pad_token_id
-            self.scope = scope
-
-        def prepare_config_and_inputs(self):
-            input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
-
-            input_mask = None
-            if self.use_input_mask:
-                input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
-
-            token_labels = None
-            if self.use_labels:
-                token_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
-
-            config = T5Config(
-                vocab_size=self.vocab_size,
-                n_positions=self.n_positions,
-                d_model=self.hidden_size,
-                d_ff=self.d_ff,
-                d_kv=self.hidden_size // self.num_attention_heads,
-                num_layers=self.num_hidden_layers,
-                num_heads=self.num_attention_heads,
-                relative_attention_num_buckets=self.relative_attention_num_buckets,
-                dropout_rate=self.dropout_rate,
-                initializer_factor=self.initializer_factor,
-                eos_token_id=self.eos_token_id,
-                bos_token_id=self.pad_token_id,
-                pad_token_id=self.pad_token_id,
-            )
-
-            return (config, input_ids, input_mask, token_labels)
-
-        def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels):
-            model = TFT5Model(config=config)
-            inputs = {
-                "inputs": input_ids,
-                "decoder_input_ids": input_ids,
-                "decoder_attention_mask": input_mask,
-            }
-            decoder_output, decoder_past, encoder_output = model(inputs)
-
-            decoder_output, decoder_past, encoder_output = model(
-                input_ids, decoder_attention_mask=input_mask, decoder_input_ids=input_ids
-            )
-            result = {
-                "encoder_output": encoder_output.numpy(),
-                "decoder_past": decoder_past,
-                "decoder_output": decoder_output.numpy(),
-            }
-            self.parent.assertListEqual(
-                list(result["encoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
-            )
-            self.parent.assertListEqual(
-                list(result["decoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
-            )
-            self.parent.assertEqual(len(decoder_past), 2)
-            # decoder_past[0] should correspond to encoder output
-            self.parent.assertTrue(tf.reduce_all(tf.math.equal(decoder_past[0][0], encoder_output)))
-            # There should be `num_layers` key value embeddings stored in decoder_past[1]
-            self.parent.assertEqual(len(decoder_past[1]), config.num_layers)
-            # There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple
-            self.parent.assertEqual(len(decoder_past[1][0]), 4)
-
-        def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels):
-            model = TFT5ForConditionalGeneration(config=config)
-            inputs_dict = {
-                "inputs": input_ids,
-                "decoder_input_ids": input_ids,
-                "decoder_attention_mask": input_mask,
-            }
-
-            prediction_scores, _, _ = model(inputs_dict)
-
-            result = {
-                "prediction_scores": prediction_scores.numpy(),
-            }
-            self.parent.assertListEqual(
-                list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
-            )
-
-        def create_and_check_t5_decoder_model_past(self, config, input_ids, decoder_input_ids, attention_mask):
-            model = TFT5Model(config=config).get_decoder()
-
-            input_ids = input_ids[:1, :]
-            self.batch_size = 1
-
-            # first forward pass
-            _, past_key_value_states = model(input_ids, use_cache=True)
-
-            # create hypothetical next token and extent to next_input_ids
-            next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
-
-            # append to next input_ids and
-            next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
-
-            output_from_no_past = model(next_input_ids)[0]
-            output_from_past = model(next_tokens, past_key_value_states=past_key_value_states)[0]
-
-            # select random slice
-            random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
-            output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
-            output_from_past_slice = output_from_past[:, 0, random_slice_idx]
-
-            # test that outputs are equal for slice
-            tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
-
-        def create_and_check_t5_decoder_model_attention_mask_past(
-            self, config, input_ids, decoder_input_ids, attention_mask
-        ):
-            model = TFT5Model(config=config).get_decoder()
-
-            # create attention mask
-            half_seq_length = self.seq_length // 2
-            attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32)
-            attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32)
-            attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1)
-
-            # first forward pass
-            _, past_key_value_states = model(input_ids, attention_mask=attn_mask, use_cache=True)
-
-            # create hypothetical next token and extent to next_input_ids
-            next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
-
-            # change a random masked slice from input_ids
-            random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1
-            random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size)
-            vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change)
-            condition = tf.transpose(
-                tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size))
-            )
-            input_ids = tf.where(condition, random_other_next_tokens, input_ids)
-
-            # append to next input_ids and attn_mask
-            next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
-            attn_mask = tf.concat([attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)], axis=1,)
-
-            # get two different outputs
-            output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
-            output_from_past = model(
-                next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask
-            )[0]
-
-            # select random slice
-            random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item()
-            output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
-            output_from_past_slice = output_from_past[:, 0, random_slice_idx]
-
-            # test that outputs are equal for slice
-            tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
-
-        def prepare_config_and_inputs_for_common(self):
-            config_and_inputs = self.prepare_config_and_inputs()
-            (config, input_ids, input_mask, token_labels) = config_and_inputs
-            inputs_dict = {
-                "inputs": input_ids,
-                "decoder_input_ids": input_ids,
-                "decoder_attention_mask": input_mask,
-                "use_cache": tf.convert_to_tensor([False]),
-            }
-            return config, inputs_dict
-
    def setUp(self):
-        self.model_tester = TFT5ModelTest.TFT5ModelTester(self)
+        self.model_tester = TFT5ModelTester(self)
        self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)

    def test_config(self):