Improve model tester (#19984)
* part 1 * part 2 * part 3 * fix * For CANINE * For ESMFold Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -44,31 +44,54 @@ class AlbertModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
embedding_size=16,
|
||||||
|
hidden_size=36,
|
||||||
|
num_hidden_layers=6,
|
||||||
|
num_hidden_groups=6,
|
||||||
|
num_attention_heads=6,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.embedding_size = 16
|
self.embedding_size = embedding_size
|
||||||
self.hidden_size = 36
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 6
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_hidden_groups = 6
|
self.num_hidden_groups = num_hidden_groups
|
||||||
self.num_attention_heads = 6
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -48,6 +48,8 @@ class CanineModelTester:
|
|||||||
use_input_mask=True,
|
use_input_mask=True,
|
||||||
use_token_type_ids=True,
|
use_token_type_ids=True,
|
||||||
use_labels=True,
|
use_labels=True,
|
||||||
|
# let's use a vocab size that's way bigger than BERT's one
|
||||||
|
vocab_size=100000,
|
||||||
hidden_size=32,
|
hidden_size=32,
|
||||||
num_hidden_layers=5,
|
num_hidden_layers=5,
|
||||||
num_attention_heads=4,
|
num_attention_heads=4,
|
||||||
@@ -70,6 +72,7 @@ class CanineModelTester:
|
|||||||
self.use_input_mask = use_input_mask
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = use_token_type_ids
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = use_labels
|
self.use_labels = use_labels
|
||||||
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = num_attention_heads
|
||||||
@@ -86,8 +89,7 @@ class CanineModelTester:
|
|||||||
self.scope = scope
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
# let's use a vocab size that's way bigger than BERT's one
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], 100000)
|
|
||||||
|
|
||||||
input_mask = None
|
input_mask = None
|
||||||
if self.use_input_mask:
|
if self.use_input_mask:
|
||||||
|
|||||||
@@ -39,30 +39,52 @@ class CTRLModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=14,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_labels=True,
|
||||||
|
use_mc_token_ids=True,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 14
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.use_mc_token_ids = True
|
self.use_mc_token_ids = use_mc_token_ids
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
self.pad_token_id = self.vocab_size - 1
|
self.pad_token_id = self.vocab_size - 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
|
|||||||
@@ -48,29 +48,50 @@ class Data2VecTextModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -45,29 +45,50 @@ class ElectraModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -40,29 +40,50 @@ class EsmModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=False,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=False,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=33,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = False
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = False
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 33
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -34,29 +34,50 @@ class EsmFoldModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=False,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=False,
|
||||||
|
use_labels=False,
|
||||||
|
vocab_size=19,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = False
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = False
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = False
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 19
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -42,35 +42,62 @@ class FlaubertModelTester(object):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_lengths=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
gelu_activation=True,
|
||||||
|
sinusoidal_embeddings=False,
|
||||||
|
causal=False,
|
||||||
|
asm=False,
|
||||||
|
n_langs=2,
|
||||||
|
vocab_size=99,
|
||||||
|
n_special=0,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=12,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
summary_type="last",
|
||||||
|
use_proj=None,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_lengths = True
|
self.use_input_lengths = use_input_lengths
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.gelu_activation = True
|
self.gelu_activation = gelu_activation
|
||||||
self.sinusoidal_embeddings = False
|
self.sinusoidal_embeddings = sinusoidal_embeddings
|
||||||
self.causal = False
|
self.causal = causal
|
||||||
self.asm = False
|
self.asm = asm
|
||||||
self.n_langs = 2
|
self.n_langs = n_langs
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.n_special = 0
|
self.n_special = n_special
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 12
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.summary_type = "last"
|
self.summary_type = summary_type
|
||||||
self.use_proj = None
|
self.use_proj = use_proj
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -46,26 +46,44 @@ class FSMTModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
src_vocab_size=99,
|
||||||
|
tgt_vocab_size=99,
|
||||||
|
langs=["ru", "en"],
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=False,
|
||||||
|
use_labels=False,
|
||||||
|
hidden_size=16,
|
||||||
|
num_hidden_layers=2,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=4,
|
||||||
|
hidden_act="relu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=20,
|
||||||
|
bos_token_id=0,
|
||||||
|
pad_token_id=1,
|
||||||
|
eos_token_id=2,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.src_vocab_size = 99
|
self.src_vocab_size = src_vocab_size
|
||||||
self.tgt_vocab_size = 99
|
self.tgt_vocab_size = tgt_vocab_size
|
||||||
self.langs = ["ru", "en"]
|
self.langs = langs
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = False
|
self.is_training = is_training
|
||||||
self.use_labels = False
|
self.use_labels = use_labels
|
||||||
self.hidden_size = 16
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 2
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 4
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "relu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 20
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.bos_token_id = 0
|
self.bos_token_id = bos_token_id
|
||||||
self.pad_token_id = 1
|
self.pad_token_id = pad_token_id
|
||||||
self.eos_token_id = 2
|
self.eos_token_id = eos_token_id
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
|
|
||||||
# hack needed for modeling_common tests - despite not really having this attribute in this model
|
# hack needed for modeling_common tests - despite not really having this attribute in this model
|
||||||
|
|||||||
@@ -53,29 +53,50 @@ class IBertModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -41,30 +41,52 @@ class LongformerModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
|
attention_window=4,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
self.attention_window = 4
|
self.attention_window = attention_window
|
||||||
|
|
||||||
# `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size
|
# `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size
|
||||||
# [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention
|
# [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention
|
||||||
|
|||||||
@@ -41,28 +41,48 @@ class OpenAIGPTModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
self.pad_token_id = self.vocab_size - 1
|
self.pad_token_id = self.vocab_size - 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
|
|||||||
@@ -50,29 +50,50 @@ class RobertaModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -37,28 +37,47 @@ class TransfoXLModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=14,
|
||||||
|
seq_length=7,
|
||||||
|
mem_len=30,
|
||||||
|
clamp_len=15,
|
||||||
|
is_training=False,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
cutoffs=[10, 50, 80],
|
||||||
|
hidden_size=32,
|
||||||
|
d_embed=32,
|
||||||
|
num_attention_heads=4,
|
||||||
|
d_head=8,
|
||||||
|
d_inner=128,
|
||||||
|
div_val=2,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
scope=None,
|
||||||
|
seed=1,
|
||||||
|
eos_token_id=0,
|
||||||
|
num_labels=3,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 14
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.mem_len = 30
|
self.mem_len = mem_len
|
||||||
self.key_length = self.seq_length + self.mem_len
|
self.key_length = self.seq_length + self.mem_len
|
||||||
self.clamp_len = 15
|
self.clamp_len = clamp_len
|
||||||
self.is_training = False
|
self.is_training = is_training
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.cutoffs = [10, 50, 80]
|
self.cutoffs = cutoffs
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.d_embed = 32
|
self.d_embed = d_embed
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.d_head = 8
|
self.d_head = d_head
|
||||||
self.d_inner = 128
|
self.d_inner = d_inner
|
||||||
self.div_val = 2
|
self.div_val = div_val
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
self.seed = 1
|
self.seed = seed
|
||||||
self.eos_token_id = 0
|
self.eos_token_id = eos_token_id
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.pad_token_id = self.vocab_size - 1
|
self.pad_token_id = self.vocab_size - 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
|
|||||||
@@ -42,35 +42,62 @@ class XLMModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_lengths=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
gelu_activation=True,
|
||||||
|
sinusoidal_embeddings=False,
|
||||||
|
causal=False,
|
||||||
|
asm=False,
|
||||||
|
n_langs=2,
|
||||||
|
vocab_size=99,
|
||||||
|
n_special=0,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=2,
|
||||||
|
num_choices=4,
|
||||||
|
summary_type="last",
|
||||||
|
use_proj=True,
|
||||||
|
scope=None,
|
||||||
|
bos_token_id=0,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_lengths = True
|
self.use_input_lengths = use_input_lengths
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.gelu_activation = True
|
self.gelu_activation = gelu_activation
|
||||||
self.sinusoidal_embeddings = False
|
self.sinusoidal_embeddings = sinusoidal_embeddings
|
||||||
self.causal = False
|
self.causal = causal
|
||||||
self.asm = False
|
self.asm = asm
|
||||||
self.n_langs = 2
|
self.n_langs = n_langs
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.n_special = 0
|
self.n_special = n_special
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 2
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.summary_type = "last"
|
self.summary_type = summary_type
|
||||||
self.use_proj = True
|
self.use_proj = use_proj
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
self.bos_token_id = 0
|
self.bos_token_id = bos_token_id
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
@@ -46,29 +46,50 @@ class XLMRobertaXLModelTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
parent,
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_input_mask=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=512,
|
||||||
|
type_vocab_size=16,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
num_choices=4,
|
||||||
|
scope=None,
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = 13
|
self.batch_size = batch_size
|
||||||
self.seq_length = 7
|
self.seq_length = seq_length
|
||||||
self.is_training = True
|
self.is_training = is_training
|
||||||
self.use_input_mask = True
|
self.use_input_mask = use_input_mask
|
||||||
self.use_token_type_ids = True
|
self.use_token_type_ids = use_token_type_ids
|
||||||
self.use_labels = True
|
self.use_labels = use_labels
|
||||||
self.vocab_size = 99
|
self.vocab_size = vocab_size
|
||||||
self.hidden_size = 32
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = 5
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = 4
|
self.num_attention_heads = num_attention_heads
|
||||||
self.intermediate_size = 37
|
self.intermediate_size = intermediate_size
|
||||||
self.hidden_act = "gelu"
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = 0.1
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = 0.1
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = 512
|
self.max_position_embeddings = max_position_embeddings
|
||||||
self.type_vocab_size = 16
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = 2
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = 0.02
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = 3
|
self.num_labels = num_labels
|
||||||
self.num_choices = 4
|
self.num_choices = num_choices
|
||||||
self.scope = None
|
self.scope = scope
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|||||||
Reference in New Issue
Block a user