From f69eb24b5a88f057a70f6ca95e9374e4ed599959 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 2 Nov 2022 17:38:44 +0100 Subject: [PATCH] Improve model tester (#19984) * part 1 * part 2 * part 3 * fix * For CANINE * For ESMFold Co-authored-by: ydshieh --- tests/models/albert/test_modeling_albert.py | 69 ++++++++++------ tests/models/canine/test_modeling_canine.py | 6 +- tests/models/ctrl/test_modeling_ctrl.py | 66 ++++++++++----- .../data2vec/test_modeling_data2vec_text.py | 63 ++++++++++----- tests/models/electra/test_modeling_electra.py | 63 ++++++++++----- tests/models/esm/test_modeling_esm.py | 63 ++++++++++----- tests/models/esm/test_modeling_esmfold.py | 63 ++++++++++----- .../models/flaubert/test_modeling_flaubert.py | 81 ++++++++++++------- tests/models/fsmt/test_modeling_fsmt.py | 54 ++++++++----- tests/models/ibert/test_modeling_ibert.py | 63 ++++++++++----- .../longformer/test_modeling_longformer.py | 66 ++++++++++----- tests/models/openai/test_modeling_openai.py | 60 +++++++++----- tests/models/roberta/test_modeling_roberta.py | 63 ++++++++++----- .../transfo_xl/test_modeling_transfo_xl.py | 57 ++++++++----- tests/models/xlm/test_modeling_xlm.py | 81 ++++++++++++------- .../test_modeling_xlm_roberta_xl.py | 63 ++++++++++----- 16 files changed, 654 insertions(+), 327 deletions(-) diff --git a/tests/models/albert/test_modeling_albert.py b/tests/models/albert/test_modeling_albert.py index 77496699d4..9acb5ba997 100644 --- a/tests/models/albert/test_modeling_albert.py +++ b/tests/models/albert/test_modeling_albert.py @@ -44,31 +44,54 @@ class AlbertModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + embedding_size=16, + hidden_size=36, + num_hidden_layers=6, + num_hidden_groups=6, + num_attention_heads=6, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_mask = True - self.use_token_type_ids = True - self.use_labels = True - self.vocab_size = 99 - self.embedding_size = 16 - self.hidden_size = 36 - self.num_hidden_layers = 6 - self.num_hidden_groups = 6 - self.num_attention_heads = 6 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.embedding_size = embedding_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_hidden_groups = num_hidden_groups + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/canine/test_modeling_canine.py b/tests/models/canine/test_modeling_canine.py index a4d13f0efa..cf45f10a83 100644 --- a/tests/models/canine/test_modeling_canine.py +++ b/tests/models/canine/test_modeling_canine.py @@ -48,6 +48,8 @@ class CanineModelTester: use_input_mask=True, use_token_type_ids=True, use_labels=True, + # let's use a vocab size that's way bigger than BERT's one + vocab_size=100000, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, @@ -70,6 +72,7 @@ class CanineModelTester: self.use_input_mask = use_input_mask self.use_token_type_ids = use_token_type_ids self.use_labels = use_labels + self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads @@ -86,8 +89,7 @@ class CanineModelTester: self.scope = scope def prepare_config_and_inputs(self): - # let's use a vocab size that's way bigger than BERT's one - input_ids = ids_tensor([self.batch_size, self.seq_length], 100000) + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: diff --git a/tests/models/ctrl/test_modeling_ctrl.py b/tests/models/ctrl/test_modeling_ctrl.py index ad6652f882..b28b44cce5 100644 --- a/tests/models/ctrl/test_modeling_ctrl.py +++ b/tests/models/ctrl/test_modeling_ctrl.py @@ -39,30 +39,52 @@ class CTRLModelTester: def __init__( self, parent, + batch_size=14, + seq_length=7, + is_training=True, + use_token_type_ids=True, + use_input_mask=True, + use_labels=True, + use_mc_token_ids=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 14 - self.seq_length = 7 - self.is_training = True - self.use_token_type_ids = True - self.use_input_mask = True - self.use_labels = True - self.use_mc_token_ids = True - self.vocab_size = 99 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_token_type_ids = use_token_type_ids + self.use_input_mask = use_input_mask + self.use_labels = use_labels + self.use_mc_token_ids = use_mc_token_ids + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope self.pad_token_id = self.vocab_size - 1 def prepare_config_and_inputs(self): diff --git a/tests/models/data2vec/test_modeling_data2vec_text.py b/tests/models/data2vec/test_modeling_data2vec_text.py index f37d64044a..cdca7d7d68 100644 --- a/tests/models/data2vec/test_modeling_data2vec_text.py +++ b/tests/models/data2vec/test_modeling_data2vec_text.py @@ -48,29 +48,50 @@ class Data2VecTextModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_mask = True - self.use_token_type_ids = True - self.use_labels = True - self.vocab_size = 99 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/electra/test_modeling_electra.py b/tests/models/electra/test_modeling_electra.py index 9a6ba063ea..bdc0715f74 100644 --- a/tests/models/electra/test_modeling_electra.py +++ b/tests/models/electra/test_modeling_electra.py @@ -45,29 +45,50 @@ class ElectraModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_mask = True - self.use_token_type_ids = True - self.use_labels = True - self.vocab_size = 99 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/esm/test_modeling_esm.py b/tests/models/esm/test_modeling_esm.py index d8ec7929f5..1b6e024958 100644 --- a/tests/models/esm/test_modeling_esm.py +++ b/tests/models/esm/test_modeling_esm.py @@ -40,29 +40,50 @@ class EsmModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=False, + use_input_mask=True, + use_token_type_ids=False, + use_labels=True, + vocab_size=33, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = False - self.use_input_mask = True - self.use_token_type_ids = False - self.use_labels = True - self.vocab_size = 33 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/esm/test_modeling_esmfold.py b/tests/models/esm/test_modeling_esmfold.py index 5813e78297..c6dd7c5655 100644 --- a/tests/models/esm/test_modeling_esmfold.py +++ b/tests/models/esm/test_modeling_esmfold.py @@ -34,29 +34,50 @@ class EsmFoldModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=False, + use_input_mask=True, + use_token_type_ids=False, + use_labels=False, + vocab_size=19, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = False - self.use_input_mask = True - self.use_token_type_ids = False - self.use_labels = False - self.vocab_size = 19 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/flaubert/test_modeling_flaubert.py b/tests/models/flaubert/test_modeling_flaubert.py index b4150a6582..2cd204ebc3 100644 --- a/tests/models/flaubert/test_modeling_flaubert.py +++ b/tests/models/flaubert/test_modeling_flaubert.py @@ -42,35 +42,62 @@ class FlaubertModelTester(object): def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_lengths=True, + use_token_type_ids=True, + use_labels=True, + gelu_activation=True, + sinusoidal_embeddings=False, + causal=False, + asm=False, + n_langs=2, + vocab_size=99, + n_special=0, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=12, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + summary_type="last", + use_proj=None, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_lengths = True - self.use_token_type_ids = True - self.use_labels = True - self.gelu_activation = True - self.sinusoidal_embeddings = False - self.causal = False - self.asm = False - self.n_langs = 2 - self.vocab_size = 99 - self.n_special = 0 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 12 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.summary_type = "last" - self.use_proj = None - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_lengths = use_input_lengths + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.gelu_activation = gelu_activation + self.sinusoidal_embeddings = sinusoidal_embeddings + self.causal = causal + self.asm = asm + self.n_langs = n_langs + self.vocab_size = vocab_size + self.n_special = n_special + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.summary_type = summary_type + self.use_proj = use_proj + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/fsmt/test_modeling_fsmt.py b/tests/models/fsmt/test_modeling_fsmt.py index 4cc4055a69..cb1d783c73 100644 --- a/tests/models/fsmt/test_modeling_fsmt.py +++ b/tests/models/fsmt/test_modeling_fsmt.py @@ -46,26 +46,44 @@ class FSMTModelTester: def __init__( self, parent, + src_vocab_size=99, + tgt_vocab_size=99, + langs=["ru", "en"], + batch_size=13, + seq_length=7, + is_training=False, + use_labels=False, + hidden_size=16, + num_hidden_layers=2, + num_attention_heads=4, + intermediate_size=4, + hidden_act="relu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=20, + bos_token_id=0, + pad_token_id=1, + eos_token_id=2, ): self.parent = parent - self.src_vocab_size = 99 - self.tgt_vocab_size = 99 - self.langs = ["ru", "en"] - self.batch_size = 13 - self.seq_length = 7 - self.is_training = False - self.use_labels = False - self.hidden_size = 16 - self.num_hidden_layers = 2 - self.num_attention_heads = 4 - self.intermediate_size = 4 - self.hidden_act = "relu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 20 - self.bos_token_id = 0 - self.pad_token_id = 1 - self.eos_token_id = 2 + self.src_vocab_size = src_vocab_size + self.tgt_vocab_size = tgt_vocab_size + self.langs = langs + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_labels = use_labels + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.bos_token_id = bos_token_id + self.pad_token_id = pad_token_id + self.eos_token_id = eos_token_id torch.manual_seed(0) # hack needed for modeling_common tests - despite not really having this attribute in this model diff --git a/tests/models/ibert/test_modeling_ibert.py b/tests/models/ibert/test_modeling_ibert.py index 78ba4d4604..c8ca026688 100644 --- a/tests/models/ibert/test_modeling_ibert.py +++ b/tests/models/ibert/test_modeling_ibert.py @@ -53,29 +53,50 @@ class IBertModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_mask = True - self.use_token_type_ids = True - self.use_labels = True - self.vocab_size = 99 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/longformer/test_modeling_longformer.py b/tests/models/longformer/test_modeling_longformer.py index c1839d67d3..6bef4cbea1 100644 --- a/tests/models/longformer/test_modeling_longformer.py +++ b/tests/models/longformer/test_modeling_longformer.py @@ -41,30 +41,52 @@ class LongformerModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, + attention_window=4, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_mask = True - self.use_token_type_ids = True - self.use_labels = True - self.vocab_size = 99 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None - self.attention_window = 4 + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope + self.attention_window = attention_window # `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size # [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention diff --git a/tests/models/openai/test_modeling_openai.py b/tests/models/openai/test_modeling_openai.py index 2ff935eef5..eb594e620c 100644 --- a/tests/models/openai/test_modeling_openai.py +++ b/tests/models/openai/test_modeling_openai.py @@ -41,28 +41,48 @@ class OpenAIGPTModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_token_type_ids = True - self.use_labels = True - self.vocab_size = 99 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope self.pad_token_id = self.vocab_size - 1 def prepare_config_and_inputs(self): diff --git a/tests/models/roberta/test_modeling_roberta.py b/tests/models/roberta/test_modeling_roberta.py index 7163a35702..72acf78c3c 100644 --- a/tests/models/roberta/test_modeling_roberta.py +++ b/tests/models/roberta/test_modeling_roberta.py @@ -50,29 +50,50 @@ class RobertaModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_mask = True - self.use_token_type_ids = True - self.use_labels = True - self.vocab_size = 99 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/transfo_xl/test_modeling_transfo_xl.py b/tests/models/transfo_xl/test_modeling_transfo_xl.py index 309811efb4..08206c0df1 100644 --- a/tests/models/transfo_xl/test_modeling_transfo_xl.py +++ b/tests/models/transfo_xl/test_modeling_transfo_xl.py @@ -37,28 +37,47 @@ class TransfoXLModelTester: def __init__( self, parent, + batch_size=14, + seq_length=7, + mem_len=30, + clamp_len=15, + is_training=False, + use_labels=True, + vocab_size=99, + cutoffs=[10, 50, 80], + hidden_size=32, + d_embed=32, + num_attention_heads=4, + d_head=8, + d_inner=128, + div_val=2, + num_hidden_layers=5, + scope=None, + seed=1, + eos_token_id=0, + num_labels=3, ): self.parent = parent - self.batch_size = 14 - self.seq_length = 7 - self.mem_len = 30 + self.batch_size = batch_size + self.seq_length = seq_length + self.mem_len = mem_len self.key_length = self.seq_length + self.mem_len - self.clamp_len = 15 - self.is_training = False - self.use_labels = True - self.vocab_size = 99 - self.cutoffs = [10, 50, 80] - self.hidden_size = 32 - self.d_embed = 32 - self.num_attention_heads = 4 - self.d_head = 8 - self.d_inner = 128 - self.div_val = 2 - self.num_hidden_layers = 5 - self.scope = None - self.seed = 1 - self.eos_token_id = 0 - self.num_labels = 3 + self.clamp_len = clamp_len + self.is_training = is_training + self.use_labels = use_labels + self.vocab_size = vocab_size + self.cutoffs = cutoffs + self.hidden_size = hidden_size + self.d_embed = d_embed + self.num_attention_heads = num_attention_heads + self.d_head = d_head + self.d_inner = d_inner + self.div_val = div_val + self.num_hidden_layers = num_hidden_layers + self.scope = scope + self.seed = seed + self.eos_token_id = eos_token_id + self.num_labels = num_labels self.pad_token_id = self.vocab_size - 1 def prepare_config_and_inputs(self): diff --git a/tests/models/xlm/test_modeling_xlm.py b/tests/models/xlm/test_modeling_xlm.py index 8f56ed8472..c5ab503131 100644 --- a/tests/models/xlm/test_modeling_xlm.py +++ b/tests/models/xlm/test_modeling_xlm.py @@ -42,35 +42,62 @@ class XLMModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_lengths=True, + use_token_type_ids=True, + use_labels=True, + gelu_activation=True, + sinusoidal_embeddings=False, + causal=False, + asm=False, + n_langs=2, + vocab_size=99, + n_special=0, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=2, + num_choices=4, + summary_type="last", + use_proj=True, + scope=None, + bos_token_id=0, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_lengths = True - self.use_token_type_ids = True - self.use_labels = True - self.gelu_activation = True - self.sinusoidal_embeddings = False - self.causal = False - self.asm = False - self.n_langs = 2 - self.vocab_size = 99 - self.n_special = 0 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 2 - self.num_choices = 4 - self.summary_type = "last" - self.use_proj = True - self.scope = None - self.bos_token_id = 0 + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_lengths = use_input_lengths + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.gelu_activation = gelu_activation + self.sinusoidal_embeddings = sinusoidal_embeddings + self.causal = causal + self.asm = asm + self.n_langs = n_langs + self.vocab_size = vocab_size + self.n_special = n_special + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.summary_type = summary_type + self.use_proj = use_proj + self.scope = scope + self.bos_token_id = bos_token_id def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) diff --git a/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py b/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py index b889753f66..75e60bc941 100644 --- a/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py +++ b/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py @@ -46,29 +46,50 @@ class XLMRobertaXLModelTester: def __init__( self, parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, ): self.parent = parent - self.batch_size = 13 - self.seq_length = 7 - self.is_training = True - self.use_input_mask = True - self.use_token_type_ids = True - self.use_labels = True - self.vocab_size = 99 - self.hidden_size = 32 - self.num_hidden_layers = 5 - self.num_attention_heads = 4 - self.intermediate_size = 37 - self.hidden_act = "gelu" - self.hidden_dropout_prob = 0.1 - self.attention_probs_dropout_prob = 0.1 - self.max_position_embeddings = 512 - self.type_vocab_size = 16 - self.type_sequence_label_size = 2 - self.initializer_range = 0.02 - self.num_labels = 3 - self.num_choices = 4 - self.scope = None + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)