From 47f0e3cfb7df192ab80215cea9096791fce08694 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Fri, 13 Dec 2019 14:33:24 +0100 Subject: [PATCH] cleaning up configuration classes --- .../summarization/configuration_bertabs.py | 10 +-- .../adding_a_new_model/configuration_xxx.py | 12 +-- .../tests/modeling_tf_xxx_test.py | 2 +- .../tests/modeling_xxx_test.py | 2 +- transformers/configuration_albert.py | 6 +- transformers/configuration_bert.py | 38 +++----- transformers/configuration_ctrl.py | 23 +---- transformers/configuration_distilbert.py | 40 ++++----- transformers/configuration_gpt2.py | 55 ++++-------- transformers/configuration_openai.py | 57 +++++------- transformers/configuration_transfo_xl.py | 26 ++---- transformers/configuration_utils.py | 27 ++++-- transformers/configuration_xlm.py | 88 ++++++++----------- transformers/configuration_xlnet.py | 81 +++++++---------- ..._original_pytorch_checkpoint_to_pytorch.py | 2 +- transformers/modeling_gpt2.py | 1 + transformers/modeling_tf_gpt2.py | 1 + transformers/modeling_tf_transfo_xl.py | 6 +- .../modeling_tf_transfo_xl_utilities.py | 12 +-- transformers/modeling_tf_xlnet.py | 2 +- transformers/modeling_transfo_xl.py | 10 +-- transformers/modeling_xlnet.py | 4 +- transformers/tests/modeling_albert_test.py | 2 +- transformers/tests/modeling_bert_test.py | 2 +- transformers/tests/modeling_common_test.py | 2 +- transformers/tests/modeling_ctrl_test.py | 2 +- .../tests/modeling_distilbert_test.py | 2 +- transformers/tests/modeling_gpt2_test.py | 2 +- transformers/tests/modeling_openai_test.py | 2 +- transformers/tests/modeling_roberta_test.py | 2 +- transformers/tests/modeling_tf_albert_test.py | 2 +- transformers/tests/modeling_tf_bert_test.py | 2 +- transformers/tests/modeling_tf_ctrl_test.py | 2 +- .../tests/modeling_tf_distilbert_test.py | 2 +- transformers/tests/modeling_tf_gpt2_test.py | 2 +- .../tests/modeling_tf_openai_gpt_test.py | 2 +- .../tests/modeling_tf_roberta_test.py | 2 +- .../tests/modeling_tf_transfo_xl_test.py | 2 +- transformers/tests/modeling_tf_xlm_test.py | 2 +- transformers/tests/modeling_tf_xlnet_test.py | 5 +- .../tests/modeling_transfo_xl_test.py | 2 +- transformers/tests/modeling_xlm_test.py | 2 +- transformers/tests/modeling_xlnet_test.py | 5 +- 43 files changed, 224 insertions(+), 329 deletions(-) diff --git a/examples/summarization/configuration_bertabs.py b/examples/summarization/configuration_bertabs.py index 5bcb65b423..054763ea93 100644 --- a/examples/summarization/configuration_bertabs.py +++ b/examples/summarization/configuration_bertabs.py @@ -65,7 +65,7 @@ class BertAbsConfig(PretrainedConfig): def __init__( self, - vocab_size_or_config_json_file=30522, + vocab_size=30522, max_pos=512, enc_layers=6, enc_hidden_size=512, @@ -81,14 +81,14 @@ class BertAbsConfig(PretrainedConfig): ): super(BertAbsConfig, self).__init__(**kwargs) - if self._input_is_path_to_json(vocab_size_or_config_json_file): - path_to_json = vocab_size_or_config_json_file + if self._input_is_path_to_json(vocab_size): + path_to_json = vocab_size with open(path_to_json, "r", encoding="utf-8") as reader: json_config = json.loads(reader.read()) for key, value in json_config.items(): self.__dict__[key] = value - elif isinstance(vocab_size_or_config_json_file, int): - self.vocab_size = vocab_size_or_config_json_file + elif isinstance(vocab_size, int): + self.vocab_size = vocab_size self.max_pos = max_pos self.enc_layers = enc_layers diff --git a/templates/adding_a_new_model/configuration_xxx.py b/templates/adding_a_new_model/configuration_xxx.py index b1614e71af..ca9e0d554b 100644 --- a/templates/adding_a_new_model/configuration_xxx.py +++ b/templates/adding_a_new_model/configuration_xxx.py @@ -39,7 +39,7 @@ class XxxConfig(PretrainedConfig): Arguments: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `XxxModel`. + vocab_size: Vocabulary size of `inputs_ids` in `XxxModel`. hidden_size: Size of the encoder layers and the pooler layer. num_hidden_layers: Number of hidden layers in the Transformer encoder. num_attention_heads: Number of attention heads for each attention layer in @@ -64,7 +64,7 @@ class XxxConfig(PretrainedConfig): pretrained_config_archive_map = XXX_PRETRAINED_CONFIG_ARCHIVE_MAP def __init__(self, - vocab_size_or_config_json_file=50257, + vocab_size=50257, n_positions=1024, n_ctx=1024, n_embd=768, @@ -84,7 +84,7 @@ class XxxConfig(PretrainedConfig): summary_first_dropout=0.1, **kwargs): super(XxxConfig, self).__init__(**kwargs) - self.vocab_size = vocab_size_or_config_json_file if isinstance(vocab_size_or_config_json_file, six.string_types) else -1 + self.vocab_size = vocab_size if isinstance(vocab_size, six.string_types) else -1 self.n_ctx = n_ctx self.n_positions = n_positions self.n_embd = n_embd @@ -102,12 +102,12 @@ class XxxConfig(PretrainedConfig): self.summary_activation = summary_activation self.summary_first_dropout = summary_first_dropout self.summary_proj_to_labels = summary_proj_to_labels - if isinstance(vocab_size_or_config_json_file, six.string_types): - with open(vocab_size_or_config_json_file, "r", encoding="utf-8") as reader: + if isinstance(vocab_size, six.string_types): + with open(vocab_size, "r", encoding="utf-8") as reader: json_config = json.loads(reader.read()) for key, value in json_config.items(): self.__dict__[key] = value - elif not isinstance(vocab_size_or_config_json_file, int): + elif not isinstance(vocab_size, int): raise ValueError( "First argument must be either a vocabulary size (int)" "or the path to a pretrained model config file (str)" diff --git a/templates/adding_a_new_model/tests/modeling_tf_xxx_test.py b/templates/adding_a_new_model/tests/modeling_tf_xxx_test.py index d7e576bf8b..912a4aa340 100644 --- a/templates/adding_a_new_model/tests/modeling_tf_xxx_test.py +++ b/templates/adding_a_new_model/tests/modeling_tf_xxx_test.py @@ -111,7 +111,7 @@ class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = XxxConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, diff --git a/templates/adding_a_new_model/tests/modeling_xxx_test.py b/templates/adding_a_new_model/tests/modeling_xxx_test.py index bfc70921cd..30e614b3f2 100644 --- a/templates/adding_a_new_model/tests/modeling_xxx_test.py +++ b/templates/adding_a_new_model/tests/modeling_xxx_test.py @@ -109,7 +109,7 @@ class XxxModelTest(CommonTestCases.CommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = XxxConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, diff --git a/transformers/configuration_albert.py b/transformers/configuration_albert.py index de665c9b1c..6a1ef78dd5 100644 --- a/transformers/configuration_albert.py +++ b/transformers/configuration_albert.py @@ -37,7 +37,7 @@ class AlbertConfig(PretrainedConfig): pretrained_config_archive_map = ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP def __init__(self, - vocab_size_or_config_json_file=30000, + vocab_size=30000, embedding_size=128, hidden_size=4096, num_hidden_layers=12, @@ -83,7 +83,7 @@ class AlbertConfig(PretrainedConfig): """ super(AlbertConfig, self).__init__(**kwargs) - self.vocab_size = vocab_size_or_config_json_file + self.vocab_size = vocab_size self.embedding_size = embedding_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers @@ -97,4 +97,4 @@ class AlbertConfig(PretrainedConfig): self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.initializer_range = initializer_range - self.layer_norm_eps = layer_norm_eps \ No newline at end of file + self.layer_norm_eps = layer_norm_eps diff --git a/transformers/configuration_bert.py b/transformers/configuration_bert.py index 01fcd88cb8..9072820bce 100644 --- a/transformers/configuration_bert.py +++ b/transformers/configuration_bert.py @@ -56,7 +56,7 @@ class BertConfig(PretrainedConfig): Arguments: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + vocab_size: Vocabulary size of `inputs_ids` in `BertModel`. hidden_size: Size of the encoder layers and the pooler layer. num_hidden_layers: Number of hidden layers in the Transformer encoder. num_attention_heads: Number of attention heads for each attention layer in @@ -81,7 +81,7 @@ class BertConfig(PretrainedConfig): pretrained_config_archive_map = BERT_PRETRAINED_CONFIG_ARCHIVE_MAP def __init__(self, - vocab_size_or_config_json_file=30522, + vocab_size=30522, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, @@ -95,25 +95,15 @@ class BertConfig(PretrainedConfig): layer_norm_eps=1e-12, **kwargs): super(BertConfig, self).__init__(**kwargs) - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - self.__dict__[key] = value - elif isinstance(vocab_size_or_config_json_file, int): - self.vocab_size = vocab_size_or_config_json_file - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.hidden_act = hidden_act - self.intermediate_size = intermediate_size - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.initializer_range = initializer_range - self.layer_norm_eps = layer_norm_eps - else: - raise ValueError("First argument must be either a vocabulary size (int)" - " or the path to a pretrained model config file (str)") + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + self.layer_norm_eps = layer_norm_eps diff --git a/transformers/configuration_ctrl.py b/transformers/configuration_ctrl.py index fcbd848dec..f9b9e409e1 100644 --- a/transformers/configuration_ctrl.py +++ b/transformers/configuration_ctrl.py @@ -31,7 +31,7 @@ class CTRLConfig(PretrainedConfig): """Configuration class to store the configuration of a `CTRLModel`. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file. + vocab_size: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file. n_positions: Number of positional embeddings. n_ctx: Size of the causal mask (usually same as n_positions). dff: Size of the inner dimension of the FFN. @@ -52,7 +52,7 @@ class CTRLConfig(PretrainedConfig): def __init__( self, - vocab_size_or_config_json_file=246534, + vocab_size=246534, n_positions=256, n_ctx=256, n_embd=1280, @@ -64,8 +64,6 @@ class CTRLConfig(PretrainedConfig): attn_pdrop=0.1, layer_norm_epsilon=1e-6, initializer_range=0.02, - - num_labels=1, summary_type='cls_index', summary_use_proj=True, summary_activation=None, @@ -76,7 +74,7 @@ class CTRLConfig(PretrainedConfig): """Constructs CTRLConfig. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file. + vocab_size: Vocabulary size of `inputs_ids` in `CTRLModel` or a configuration json file. n_positions: Number of positional embeddings. n_ctx: Size of the causal mask (usually same as n_positions). dff: Size of the inner dimension of the FFN. @@ -94,8 +92,7 @@ class CTRLConfig(PretrainedConfig): initializing all weight matrices. """ super(CTRLConfig, self).__init__(**kwargs) - - self.vocab_size = vocab_size_or_config_json_file if isinstance(vocab_size_or_config_json_file, int) else -1 + self.vocab_size = vocab_size self.n_ctx = n_ctx self.n_positions = n_positions self.n_embd = n_embd @@ -108,23 +105,11 @@ class CTRLConfig(PretrainedConfig): self.layer_norm_epsilon = layer_norm_epsilon self.initializer_range = initializer_range - self.num_labels = num_labels self.summary_type = summary_type self.summary_use_proj = summary_use_proj self.summary_activation = summary_activation self.summary_first_dropout = summary_first_dropout self.summary_proj_to_labels = summary_proj_to_labels - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding="utf-8") as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - self.__dict__[key] = value - elif not isinstance(vocab_size_or_config_json_file, int): - raise ValueError( - "First argument must be either a vocabulary size (int)" - "or the path to a pretrained model config file (str)" - ) @property def max_position_embeddings(self): diff --git a/transformers/configuration_distilbert.py b/transformers/configuration_distilbert.py index d5d575be29..d9f7cc6348 100644 --- a/transformers/configuration_distilbert.py +++ b/transformers/configuration_distilbert.py @@ -37,7 +37,7 @@ class DistilBertConfig(PretrainedConfig): pretrained_config_archive_map = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP def __init__(self, - vocab_size_or_config_json_file=30522, + vocab_size=30522, max_position_embeddings=512, sinusoidal_pos_embds=False, n_layers=6, @@ -53,31 +53,21 @@ class DistilBertConfig(PretrainedConfig): seq_classif_dropout=0.2, **kwargs): super(DistilBertConfig, self).__init__(**kwargs) + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.sinusoidal_pos_embds = sinusoidal_pos_embds + self.n_layers = n_layers + self.n_heads = n_heads + self.dim = dim + self.hidden_dim = hidden_dim + self.dropout = dropout + self.attention_dropout = attention_dropout + self.activation = activation + self.initializer_range = initializer_range + self.tie_weights_ = tie_weights_ + self.qa_dropout = qa_dropout + self.seq_classif_dropout = seq_classif_dropout - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - self.__dict__[key] = value - elif isinstance(vocab_size_or_config_json_file, int): - self.vocab_size = vocab_size_or_config_json_file - self.max_position_embeddings = max_position_embeddings - self.sinusoidal_pos_embds = sinusoidal_pos_embds - self.n_layers = n_layers - self.n_heads = n_heads - self.dim = dim - self.hidden_dim = hidden_dim - self.dropout = dropout - self.attention_dropout = attention_dropout - self.activation = activation - self.initializer_range = initializer_range - self.tie_weights_ = tie_weights_ - self.qa_dropout = qa_dropout - self.seq_classif_dropout = seq_classif_dropout - else: - raise ValueError("First argument must be either a vocabulary size (int)" - " or the path to a pretrained model config file (str)") @property def hidden_size(self): return self.dim diff --git a/transformers/configuration_gpt2.py b/transformers/configuration_gpt2.py index c2fb4948d3..4c200c0760 100644 --- a/transformers/configuration_gpt2.py +++ b/transformers/configuration_gpt2.py @@ -36,7 +36,7 @@ class GPT2Config(PretrainedConfig): """Configuration class to store the configuration of a `GPT2Model`. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file. + vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file. n_positions: Number of positional embeddings. n_ctx: Size of the causal mask (usually same as n_positions). n_embd: Dimensionality of the embeddings and hidden states. @@ -56,7 +56,7 @@ class GPT2Config(PretrainedConfig): def __init__( self, - vocab_size_or_config_json_file=50257, + vocab_size=50257, n_positions=1024, n_ctx=1024, n_embd=768, @@ -67,8 +67,6 @@ class GPT2Config(PretrainedConfig): attn_pdrop=0.1, layer_norm_epsilon=1e-5, initializer_range=0.02, - - num_labels=1, summary_type='cls_index', summary_use_proj=True, summary_activation=None, @@ -79,7 +77,7 @@ class GPT2Config(PretrainedConfig): """Constructs GPT2Config. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file. + vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file. n_positions: Number of positional embeddings. n_ctx: Size of the causal mask (usually same as n_positions). n_embd: Dimensionality of the embeddings and hidden states. @@ -96,37 +94,22 @@ class GPT2Config(PretrainedConfig): initializing all weight matrices. """ super(GPT2Config, self).__init__(**kwargs) - - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding="utf-8") as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - self.__dict__[key] = value - elif isinstance(vocab_size_or_config_json_file, int): - self.vocab_size = vocab_size_or_config_json_file - self.n_ctx = n_ctx - self.n_positions = n_positions - self.n_embd = n_embd - self.n_layer = n_layer - self.n_head = n_head - self.resid_pdrop = resid_pdrop - self.embd_pdrop = embd_pdrop - self.attn_pdrop = attn_pdrop - self.layer_norm_epsilon = layer_norm_epsilon - self.initializer_range = initializer_range - - self.num_labels = num_labels - self.summary_type = summary_type - self.summary_use_proj = summary_use_proj - self.summary_activation = summary_activation - self.summary_first_dropout = summary_first_dropout - self.summary_proj_to_labels = summary_proj_to_labels - else: - raise ValueError( - "First argument must be either a vocabulary size (int)" - "or the path to a pretrained model config file (str)" - ) + self.vocab_size = vocab_size + self.n_ctx = n_ctx + self.n_positions = n_positions + self.n_embd = n_embd + self.n_layer = n_layer + self.n_head = n_head + self.resid_pdrop = resid_pdrop + self.embd_pdrop = embd_pdrop + self.attn_pdrop = attn_pdrop + self.layer_norm_epsilon = layer_norm_epsilon + self.initializer_range = initializer_range + self.summary_type = summary_type + self.summary_use_proj = summary_use_proj + self.summary_activation = summary_activation + self.summary_first_dropout = summary_first_dropout + self.summary_proj_to_labels = summary_proj_to_labels @property def max_position_embeddings(self): diff --git a/transformers/configuration_openai.py b/transformers/configuration_openai.py index 886b7f5bc5..7776a0bb9f 100644 --- a/transformers/configuration_openai.py +++ b/transformers/configuration_openai.py @@ -35,7 +35,7 @@ class OpenAIGPTConfig(PretrainedConfig): Configuration class to store the configuration of a `OpenAIGPTModel`. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file. + vocab_size: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file. n_positions: Number of positional embeddings. n_ctx: Size of the causal mask (usually same as n_positions). n_embd: Dimensionality of the embeddings and hidden states. @@ -58,7 +58,7 @@ class OpenAIGPTConfig(PretrainedConfig): def __init__( self, - vocab_size_or_config_json_file=40478, + vocab_size=40478, n_positions=512, n_ctx=512, n_embd=768, @@ -71,8 +71,6 @@ class OpenAIGPTConfig(PretrainedConfig): layer_norm_epsilon=1e-5, initializer_range=0.02, predict_special_tokens=True, - - num_labels=1, summary_type='cls_index', summary_use_proj=True, summary_activation=None, @@ -83,39 +81,24 @@ class OpenAIGPTConfig(PretrainedConfig): """Constructs OpenAIGPTConfig. """ super(OpenAIGPTConfig, self).__init__(**kwargs) - - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding="utf-8") as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - self.__dict__[key] = value - elif isinstance(vocab_size_or_config_json_file, int): - self.vocab_size = vocab_size_or_config_json_file - self.n_ctx = n_ctx - self.n_positions = n_positions - self.n_embd = n_embd - self.n_layer = n_layer - self.n_head = n_head - self.afn = afn - self.resid_pdrop = resid_pdrop - self.embd_pdrop = embd_pdrop - self.attn_pdrop = attn_pdrop - self.layer_norm_epsilon = layer_norm_epsilon - self.initializer_range = initializer_range - self.predict_special_tokens = predict_special_tokens - - self.num_labels = num_labels - self.summary_type = summary_type - self.summary_use_proj = summary_use_proj - self.summary_activation = summary_activation - self.summary_first_dropout = summary_first_dropout - self.summary_proj_to_labels = summary_proj_to_labels - else: - raise ValueError( - "First argument must be either a vocabulary size (int)" - "or the path to a pretrained model config file (str)" - ) + self.vocab_size = vocab_size + self.n_ctx = n_ctx + self.n_positions = n_positions + self.n_embd = n_embd + self.n_layer = n_layer + self.n_head = n_head + self.afn = afn + self.resid_pdrop = resid_pdrop + self.embd_pdrop = embd_pdrop + self.attn_pdrop = attn_pdrop + self.layer_norm_epsilon = layer_norm_epsilon + self.initializer_range = initializer_range + self.predict_special_tokens = predict_special_tokens + self.summary_type = summary_type + self.summary_use_proj = summary_use_proj + self.summary_activation = summary_activation + self.summary_first_dropout = summary_first_dropout + self.summary_proj_to_labels = summary_proj_to_labels @property def max_position_embeddings(self): diff --git a/transformers/configuration_transfo_xl.py b/transformers/configuration_transfo_xl.py index d55a6adbe6..52f0f45a50 100644 --- a/transformers/configuration_transfo_xl.py +++ b/transformers/configuration_transfo_xl.py @@ -34,7 +34,7 @@ class TransfoXLConfig(PretrainedConfig): """Configuration class to store the configuration of a `TransfoXLModel`. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `TransfoXLModel` or a configuration json file. + vocab_size: Vocabulary size of `inputs_ids` in `TransfoXLModel` or a configuration json file. cutoffs: cutoffs for the adaptive softmax d_model: Dimensionality of the model's hidden states. d_embed: Dimensionality of the embeddings @@ -68,7 +68,7 @@ class TransfoXLConfig(PretrainedConfig): pretrained_config_archive_map = TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP def __init__(self, - vocab_size_or_config_json_file=267735, + vocab_size=267735, cutoffs=[20000, 40000, 200000], d_model=1024, d_embed=1024, @@ -100,7 +100,7 @@ class TransfoXLConfig(PretrainedConfig): """Constructs TransfoXLConfig. """ super(TransfoXLConfig, self).__init__(**kwargs) - self.n_token = vocab_size_or_config_json_file if isinstance(vocab_size_or_config_json_file, int) else -1 + self.vocab_size = vocab_size self.cutoffs = [] self.cutoffs.extend(cutoffs) self.tie_weight = tie_weight @@ -133,27 +133,17 @@ class TransfoXLConfig(PretrainedConfig): self.init_std = init_std self.layer_norm_epsilon = layer_norm_epsilon - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - self.__dict__[key] = value - elif not isinstance(vocab_size_or_config_json_file, int): - raise ValueError("First argument must be either a vocabulary size (int)" - " or the path to a pretrained model config file (str)") - @property def max_position_embeddings(self): return self.tgt_len + self.ext_len + self.mem_len @property - def vocab_size(self): - return self.n_token + def n_token(self): # Backward compatibility + return self.vocab_size - @vocab_size.setter - def vocab_size(self, value): - self.n_token = value + @n_token.setter + def n_token(self, value): # Backward compatibility + self.vocab_size = value @property def hidden_size(self): diff --git a/transformers/configuration_utils.py b/transformers/configuration_utils.py index 82959adb57..6c9eeea175 100644 --- a/transformers/configuration_utils.py +++ b/transformers/configuration_utils.py @@ -49,8 +49,7 @@ class PretrainedConfig(object): pretrained_config_archive_map = {} def __init__(self, **kwargs): - self.finetuning_task = kwargs.pop('finetuning_task', None) - self.num_labels = kwargs.pop('num_labels', 2) + # Attributes with defaults self.output_attentions = kwargs.pop('output_attentions', False) self.output_hidden_states = kwargs.pop('output_hidden_states', False) self.output_past = kwargs.pop('output_past', True) # Not used by all models @@ -59,6 +58,22 @@ class PretrainedConfig(object): self.pruned_heads = kwargs.pop('pruned_heads', {}) self.is_decoder = kwargs.pop('is_decoder', False) + # Fine-tuning task arguments + self.finetuning_task = kwargs.pop('finetuning_task', None) + self.num_labels = kwargs.pop('num_labels', 2) + self.id2label = kwargs.pop('id2label', {i: 'LABEL_{}'.format(i) for i in range(self.num_labels)}) + self.id2label = dict((int(key), value) for key, value in self.id2label.items()) + self.label2id = kwargs.pop('label2id', dict(zip(self.id2label.values(), self.id2label.keys()))) + self.label2id = dict((key, int(value)) for key, value in self.label2id.items()) + + # Additional attributes without default values + for key, value in kwargs.items(): + try: + setattr(self, key, value) + except AttributeError as err: + logger.error("Can't set {} with value {} for {}".format(key, value, self)) + raise err + def save_pretrained(self, save_directory): """ Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method. @@ -183,17 +198,15 @@ class PretrainedConfig(object): @classmethod def from_dict(cls, json_object): """Constructs a `Config` from a Python dictionary of parameters.""" - config = cls(vocab_size_or_config_json_file=-1) - for key, value in json_object.items(): - setattr(config, key, value) - return config + return cls(**json_object) @classmethod def from_json_file(cls, json_file): """Constructs a `Config` from a json file of parameters.""" with open(json_file, "r", encoding='utf-8') as reader: text = reader.read() - return cls.from_dict(json.loads(text)) + dict_obj = json.loads(text) + return cls(**dict_obj) def __eq__(self, other): return self.__dict__ == other.__dict__ diff --git a/transformers/configuration_xlm.py b/transformers/configuration_xlm.py index fa3a5f40f6..0740cc4026 100644 --- a/transformers/configuration_xlm.py +++ b/transformers/configuration_xlm.py @@ -42,7 +42,7 @@ class XLMConfig(PretrainedConfig): """Configuration class to store the configuration of a `XLMModel`. Args: - vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `XLMModel`. + vocab_size: Vocabulary size of `inputs_ids` in `XLMModel`. d_model: Size of the encoder layers and the pooler layer. n_layer: Number of hidden layers in the Transformer encoder. n_head: Number of attention heads for each attention layer in @@ -81,7 +81,7 @@ class XLMConfig(PretrainedConfig): pretrained_config_archive_map = XLM_PRETRAINED_CONFIG_ARCHIVE_MAP def __init__(self, - vocab_size_or_config_json_file=30145, + vocab_size=30145, emb_dim=2048, n_layers=12, n_heads=16, @@ -103,9 +103,6 @@ class XLMConfig(PretrainedConfig): unk_index=3, mask_index=5, is_encoder=True, - - finetuning_task=None, - num_labels=2, summary_type='first', summary_use_proj=True, summary_activation=None, @@ -117,56 +114,43 @@ class XLMConfig(PretrainedConfig): """Constructs XLMConfig. """ super(XLMConfig, self).__init__(**kwargs) - - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - self.__dict__[key] = value - elif isinstance(vocab_size_or_config_json_file, int): - self.n_words = vocab_size_or_config_json_file - self.emb_dim = emb_dim - self.n_layers = n_layers - self.n_heads = n_heads - self.dropout = dropout - self.attention_dropout = attention_dropout - self.gelu_activation = gelu_activation - self.sinusoidal_embeddings = sinusoidal_embeddings - self.causal = causal - self.asm = asm - self.n_langs = n_langs - self.use_lang_emb = use_lang_emb - self.layer_norm_eps = layer_norm_eps - self.bos_index = bos_index - self.eos_index = eos_index - self.pad_index = pad_index - self.unk_index = unk_index - self.mask_index = mask_index - self.is_encoder = is_encoder - self.max_position_embeddings = max_position_embeddings - self.embed_init_std = embed_init_std - self.init_std = init_std - self.finetuning_task = finetuning_task - self.num_labels = num_labels - self.summary_type = summary_type - self.summary_use_proj = summary_use_proj - self.summary_activation = summary_activation - self.summary_proj_to_labels = summary_proj_to_labels - self.summary_first_dropout = summary_first_dropout - self.start_n_top = start_n_top - self.end_n_top = end_n_top - else: - raise ValueError("First argument must be either a vocabulary size (int)" - " or the path to a pretrained model config file (str)") + self.vocab_size = vocab_size + self.emb_dim = emb_dim + self.n_layers = n_layers + self.n_heads = n_heads + self.dropout = dropout + self.attention_dropout = attention_dropout + self.gelu_activation = gelu_activation + self.sinusoidal_embeddings = sinusoidal_embeddings + self.causal = causal + self.asm = asm + self.n_langs = n_langs + self.use_lang_emb = use_lang_emb + self.layer_norm_eps = layer_norm_eps + self.bos_index = bos_index + self.eos_index = eos_index + self.pad_index = pad_index + self.unk_index = unk_index + self.mask_index = mask_index + self.is_encoder = is_encoder + self.max_position_embeddings = max_position_embeddings + self.embed_init_std = embed_init_std + self.init_std = init_std + self.summary_type = summary_type + self.summary_use_proj = summary_use_proj + self.summary_activation = summary_activation + self.summary_proj_to_labels = summary_proj_to_labels + self.summary_first_dropout = summary_first_dropout + self.start_n_top = start_n_top + self.end_n_top = end_n_top @property - def vocab_size(self): - return self.n_words + def n_words(self): # For backward compatibility + return self.vocab_size - @vocab_size.setter - def vocab_size(self, value): - self.n_words = value + @n_words.setter + def n_words(self, value): # For backward compatibility + self.vocab_size = value @property def hidden_size(self): diff --git a/transformers/configuration_xlnet.py b/transformers/configuration_xlnet.py index 0dbf518849..017c57cfd5 100644 --- a/transformers/configuration_xlnet.py +++ b/transformers/configuration_xlnet.py @@ -35,7 +35,7 @@ class XLNetConfig(PretrainedConfig): """Configuration class to store the configuration of a ``XLNetModel``. Args: - vocab_size_or_config_json_file: Vocabulary size of ``inputs_ids`` in ``XLNetModel``. + vocab_size: Vocabulary size of ``inputs_ids`` in ``XLNetModel``. d_model: Size of the encoder layers and the pooler layer. n_layer: Number of hidden layers in the Transformer encoder. n_head: Number of attention heads for each attention layer in @@ -72,28 +72,22 @@ class XLNetConfig(PretrainedConfig): pretrained_config_archive_map = XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP def __init__(self, - vocab_size_or_config_json_file=32000, + vocab_size=32000, d_model=1024, n_layer=24, n_head=16, d_inner=4096, - max_position_embeddings=512, ff_activation="gelu", untie_r=True, attn_type="bi", - initializer_range=0.02, layer_norm_eps=1e-12, - dropout=0.1, mem_len=None, reuse_len=None, bi_data=False, clamp_len=-1, same_length=False, - - finetuning_task=None, - num_labels=2, summary_type='last', summary_use_proj=True, summary_activation='tanh', @@ -104,58 +98,45 @@ class XLNetConfig(PretrainedConfig): """Constructs XLNetConfig. """ super(XLNetConfig, self).__init__(**kwargs) + self.vocab_size = vocab_size + self.d_model = d_model + self.n_layer = n_layer + self.n_head = n_head + assert d_model % n_head == 0 + self.d_head = d_model // n_head + self.ff_activation = ff_activation + self.d_inner = d_inner + self.untie_r = untie_r + self.attn_type = attn_type - if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 - and isinstance(vocab_size_or_config_json_file, unicode)): - with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: - json_config = json.loads(reader.read()) - for key, value in json_config.items(): - setattr(config, key, value) - elif isinstance(vocab_size_or_config_json_file, int): - self.n_token = vocab_size_or_config_json_file - self.d_model = d_model - self.n_layer = n_layer - self.n_head = n_head - assert d_model % n_head == 0 - self.d_head = d_model // n_head - self.ff_activation = ff_activation - self.d_inner = d_inner - self.untie_r = untie_r - self.attn_type = attn_type + self.initializer_range = initializer_range + self.layer_norm_eps = layer_norm_eps - self.initializer_range = initializer_range - self.layer_norm_eps = layer_norm_eps + self.dropout = dropout + self.mem_len = mem_len + self.reuse_len = reuse_len + self.bi_data = bi_data + self.clamp_len = clamp_len + self.same_length = same_length - self.dropout = dropout - self.mem_len = mem_len - self.reuse_len = reuse_len - self.bi_data = bi_data - self.clamp_len = clamp_len - self.same_length = same_length - - self.finetuning_task = finetuning_task - self.num_labels = num_labels - self.summary_type = summary_type - self.summary_use_proj = summary_use_proj - self.summary_activation = summary_activation - self.summary_last_dropout = summary_last_dropout - self.start_n_top = start_n_top - self.end_n_top = end_n_top - else: - raise ValueError("First argument must be either a vocabulary size (int)" - " or the path to a pretrained model config file (str)") + self.summary_type = summary_type + self.summary_use_proj = summary_use_proj + self.summary_activation = summary_activation + self.summary_last_dropout = summary_last_dropout + self.start_n_top = start_n_top + self.end_n_top = end_n_top @property def max_position_embeddings(self): return -1 @property - def vocab_size(self): - return self.n_token + def n_token(self): # Backward compatibility + return self.vocab_size - @vocab_size.setter - def vocab_size(self, value): - self.n_token = value + @n_token.setter + def n_token(self, value): # Backward compatibility + self.vocab_size = value @property def hidden_size(self): diff --git a/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py b/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py index 60935add60..b4dc1bb61b 100644 --- a/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py +++ b/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py @@ -46,7 +46,7 @@ def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path, pytorch_dump_ roberta = FairseqRobertaModel.from_pretrained(roberta_checkpoint_path) roberta.eval() # disable dropout config = BertConfig( - vocab_size_or_config_json_file=50265, + vocab_size=50265, hidden_size=roberta.args.encoder_embed_dim, num_hidden_layers=roberta.args.encoder_layers, num_attention_heads=roberta.args.encoder_attention_heads, diff --git a/transformers/modeling_gpt2.py b/transformers/modeling_gpt2.py index 96fd1c0607..ea660262d7 100644 --- a/transformers/modeling_gpt2.py +++ b/transformers/modeling_gpt2.py @@ -634,6 +634,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): """ def __init__(self, config): super(GPT2DoubleHeadsModel, self).__init__(config) + config.num_labels = 1 self.transformer = GPT2Model(config) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.multiple_choice_head = SequenceSummary(config) diff --git a/transformers/modeling_tf_gpt2.py b/transformers/modeling_tf_gpt2.py index c738e5e8e3..973473179f 100644 --- a/transformers/modeling_tf_gpt2.py +++ b/transformers/modeling_tf_gpt2.py @@ -574,6 +574,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): """ def __init__(self, config, *inputs, **kwargs): super(TFGPT2DoubleHeadsModel, self).__init__(config, *inputs, **kwargs) + config.num_labels = 1 self.transformer = TFGPT2MainLayer(config, name='transformer') self.multiple_choice_head = TFSequenceSummary(config, initializer_range=config.initializer_range, name='multiple_choice_head') diff --git a/transformers/modeling_tf_transfo_xl.py b/transformers/modeling_tf_transfo_xl.py index fd325e218e..848edfa37a 100644 --- a/transformers/modeling_tf_transfo_xl.py +++ b/transformers/modeling_tf_transfo_xl.py @@ -353,7 +353,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): self.output_attentions = config.output_attentions self.output_hidden_states = config.output_hidden_states - self.n_token = config.n_token + self.n_token = config.vocab_size self.d_embed = config.d_embed self.d_model = config.d_model @@ -361,7 +361,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): self.d_head = config.d_head self.untie_r = config.untie_r - self.word_emb = TFAdaptiveEmbedding(config.n_token, config.d_embed, config.d_model, config.cutoffs, + self.word_emb = TFAdaptiveEmbedding(config.vocab_size, config.d_embed, config.d_model, config.cutoffs, div_val=config.div_val, init_std=config.init_std, name='word_emb') self.drop = tf.keras.layers.Dropout(config.dropout) @@ -729,7 +729,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): raise NotImplementedError # use adaptive softmax (including standard softmax) else: - self.crit = TFAdaptiveSoftmaxMask(config.n_token, config.d_embed, config.d_model, + self.crit = TFAdaptiveSoftmaxMask(config.vocab_size, config.d_embed, config.d_model, config.cutoffs, div_val=config.div_val, name='crit') def reset_length(self, tgt_len, ext_len, mem_len): diff --git a/transformers/modeling_tf_transfo_xl_utilities.py b/transformers/modeling_tf_transfo_xl_utilities.py index e6a6dfe686..f730af851f 100644 --- a/transformers/modeling_tf_transfo_xl_utilities.py +++ b/transformers/modeling_tf_transfo_xl_utilities.py @@ -25,15 +25,15 @@ import tensorflow as tf from .modeling_tf_utils import shape_list class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): - def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, + def __init__(self, vocab_size, d_embed, d_proj, cutoffs, div_val=1, keep_order=False, **kwargs): super(TFAdaptiveSoftmaxMask, self).__init__(**kwargs) - self.n_token = n_token + self.vocab_size = vocab_size self.d_embed = d_embed self.d_proj = d_proj - self.cutoffs = cutoffs + [n_token] + self.cutoffs = cutoffs + [vocab_size] self.cutoff_ends = [0] + self.cutoffs self.div_val = div_val @@ -66,11 +66,11 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): self.out_projs.append(weight) else: self.out_projs.append(None) - weight = self.add_weight(shape=(self.n_token, self.d_embed,), + weight = self.add_weight(shape=(self.vocab_size, self.d_embed,), initializer='zeros', trainable=True, name='out_layers_._{}_._weight'.format(i)) - bias = self.add_weight(shape=(self.n_token,), + bias = self.add_weight(shape=(self.vocab_size,), initializer='zeros', trainable=True, name='out_layers_._{}_._bias'.format(i)) @@ -114,7 +114,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): hidden, target = inputs head_logprob = 0 if self.n_clusters == 0: - softmax_b = tf.get_variable('bias', [n_token], initializer=tf.zeros_initializer()) + softmax_b = tf.get_variable('bias', [self.config.vocab_size], initializer=tf.zeros_initializer()) output = self._logit(hidden, self.out_layers[0][0], self.out_layers[0][1], self.out_projs[0]) if target is not None: loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output) diff --git a/transformers/modeling_tf_xlnet.py b/transformers/modeling_tf_xlnet.py index 759b57d835..dde2b6a8df 100644 --- a/transformers/modeling_tf_xlnet.py +++ b/transformers/modeling_tf_xlnet.py @@ -366,7 +366,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): self.use_bfloat16 = config.use_bfloat16 self.initializer_range = config.initializer_range - self.word_embedding = TFSharedEmbeddings(config.n_token, config.d_model, initializer_range=config.initializer_range, name='word_embedding') + self.word_embedding = TFSharedEmbeddings(config.vocab_size, config.d_model, initializer_range=config.initializer_range, name='word_embedding') self.layer = [TFXLNetLayer(config, name='layer_._{}'.format(i)) for i in range(config.n_layer)] self.dropout = tf.keras.layers.Dropout(config.dropout) diff --git a/transformers/modeling_transfo_xl.py b/transformers/modeling_transfo_xl.py index a6a82f0dfe..f87d857a7f 100644 --- a/transformers/modeling_transfo_xl.py +++ b/transformers/modeling_transfo_xl.py @@ -592,14 +592,14 @@ class TransfoXLModel(TransfoXLPreTrainedModel): self.output_attentions = config.output_attentions self.output_hidden_states = config.output_hidden_states - self.n_token = config.n_token + self.n_token = config.vocab_size self.d_embed = config.d_embed self.d_model = config.d_model self.n_head = config.n_head self.d_head = config.d_head - self.word_emb = AdaptiveEmbedding(config.n_token, config.d_embed, config.d_model, config.cutoffs, + self.word_emb = AdaptiveEmbedding(config.vocab_size, config.d_embed, config.d_model, config.cutoffs, div_val=config.div_val) self.drop = nn.Dropout(config.dropout) @@ -836,11 +836,11 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): self.sample_softmax = config.sample_softmax # use sampled softmax if config.sample_softmax > 0: - self.out_layer = nn.Linear(config.d_model, config.n_token) - self.sampler = LogUniformSampler(config.n_token, config.sample_softmax) + self.out_layer = nn.Linear(config.d_model, config.vocab_size) + self.sampler = LogUniformSampler(config.vocab_size, config.sample_softmax) # use adaptive softmax (including standard softmax) else: - self.crit = ProjectedAdaptiveLogSoftmax(config.n_token, config.d_embed, config.d_model, + self.crit = ProjectedAdaptiveLogSoftmax(config.vocab_size, config.d_embed, config.d_model, config.cutoffs, div_val=config.div_val) self.init_weights() diff --git a/transformers/modeling_xlnet.py b/transformers/modeling_xlnet.py index 225e5b059b..daed5f2857 100644 --- a/transformers/modeling_xlnet.py +++ b/transformers/modeling_xlnet.py @@ -609,7 +609,7 @@ class XLNetModel(XLNetPreTrainedModel): self.clamp_len = config.clamp_len self.n_layer = config.n_layer - self.word_embedding = nn.Embedding(config.n_token, config.d_model) + self.word_embedding = nn.Embedding(config.vocab_size, config.d_model) self.mask_emb = nn.Parameter(torch.FloatTensor(1, 1, config.d_model)) self.layer = nn.ModuleList([XLNetLayer(config) for _ in range(config.n_layer)]) self.dropout = nn.Dropout(config.dropout) @@ -940,7 +940,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): self.same_length = config.same_length self.transformer = XLNetModel(config) - self.lm_loss = nn.Linear(config.d_model, config.n_token, bias=True) + self.lm_loss = nn.Linear(config.d_model, config.vocab_size, bias=True) self.init_weights() diff --git a/transformers/tests/modeling_albert_test.py b/transformers/tests/modeling_albert_test.py index a14d66ae8f..1911d244e7 100644 --- a/transformers/tests/modeling_albert_test.py +++ b/transformers/tests/modeling_albert_test.py @@ -110,7 +110,7 @@ class AlbertModelTest(CommonTestCases.CommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = AlbertConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, diff --git a/transformers/tests/modeling_bert_test.py b/transformers/tests/modeling_bert_test.py index 539f66cd3f..0eb7bc9a14 100644 --- a/transformers/tests/modeling_bert_test.py +++ b/transformers/tests/modeling_bert_test.py @@ -109,7 +109,7 @@ class BertModelTest(CommonTestCases.CommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = BertConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, diff --git a/transformers/tests/modeling_common_test.py b/transformers/tests/modeling_common_test.py index 80d5d95455..f86eb7a3d0 100644 --- a/transformers/tests/modeling_common_test.py +++ b/transformers/tests/modeling_common_test.py @@ -633,7 +633,7 @@ class CommonTestCases: mc_token_ids = ids_tensor([self.batch_size, self.n_choices], self.seq_length) config = self.config_class( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_positions=self.n_positions, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, diff --git a/transformers/tests/modeling_ctrl_test.py b/transformers/tests/modeling_ctrl_test.py index 8c14578a5c..c7de49b2ab 100644 --- a/transformers/tests/modeling_ctrl_test.py +++ b/transformers/tests/modeling_ctrl_test.py @@ -114,7 +114,7 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = CTRLConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, diff --git a/transformers/tests/modeling_distilbert_test.py b/transformers/tests/modeling_distilbert_test.py index 4b8f64327d..82f71c40da 100644 --- a/transformers/tests/modeling_distilbert_test.py +++ b/transformers/tests/modeling_distilbert_test.py @@ -105,7 +105,7 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = DistilBertConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, dim=self.hidden_size, n_layers=self.num_hidden_layers, n_heads=self.num_attention_heads, diff --git a/transformers/tests/modeling_gpt2_test.py b/transformers/tests/modeling_gpt2_test.py index ecaa2a4bd0..a82e39c261 100644 --- a/transformers/tests/modeling_gpt2_test.py +++ b/transformers/tests/modeling_gpt2_test.py @@ -110,7 +110,7 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = GPT2Config( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, diff --git a/transformers/tests/modeling_openai_test.py b/transformers/tests/modeling_openai_test.py index 8e4d13438d..7655e432e8 100644 --- a/transformers/tests/modeling_openai_test.py +++ b/transformers/tests/modeling_openai_test.py @@ -98,7 +98,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = OpenAIGPTConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, diff --git a/transformers/tests/modeling_roberta_test.py b/transformers/tests/modeling_roberta_test.py index 7a3553b164..4d34a50528 100644 --- a/transformers/tests/modeling_roberta_test.py +++ b/transformers/tests/modeling_roberta_test.py @@ -106,7 +106,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = RobertaConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, diff --git a/transformers/tests/modeling_tf_albert_test.py b/transformers/tests/modeling_tf_albert_test.py index 7d3325b70b..93aeab66c2 100644 --- a/transformers/tests/modeling_tf_albert_test.py +++ b/transformers/tests/modeling_tf_albert_test.py @@ -118,7 +118,7 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = AlbertConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, diff --git a/transformers/tests/modeling_tf_bert_test.py b/transformers/tests/modeling_tf_bert_test.py index d7a86fecb9..20073e1ab8 100644 --- a/transformers/tests/modeling_tf_bert_test.py +++ b/transformers/tests/modeling_tf_bert_test.py @@ -114,7 +114,7 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = BertConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, diff --git a/transformers/tests/modeling_tf_ctrl_test.py b/transformers/tests/modeling_tf_ctrl_test.py index 0b421c20c9..0876582e57 100644 --- a/transformers/tests/modeling_tf_ctrl_test.py +++ b/transformers/tests/modeling_tf_ctrl_test.py @@ -112,7 +112,7 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = CTRLConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, diff --git a/transformers/tests/modeling_tf_distilbert_test.py b/transformers/tests/modeling_tf_distilbert_test.py index 0ec45150ca..d9e971c2a5 100644 --- a/transformers/tests/modeling_tf_distilbert_test.py +++ b/transformers/tests/modeling_tf_distilbert_test.py @@ -107,7 +107,7 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = DistilBertConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, dim=self.hidden_size, n_layers=self.num_hidden_layers, n_heads=self.num_attention_heads, diff --git a/transformers/tests/modeling_tf_gpt2_test.py b/transformers/tests/modeling_tf_gpt2_test.py index e070b72e65..3f30b32787 100644 --- a/transformers/tests/modeling_tf_gpt2_test.py +++ b/transformers/tests/modeling_tf_gpt2_test.py @@ -115,7 +115,7 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = GPT2Config( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, diff --git a/transformers/tests/modeling_tf_openai_gpt_test.py b/transformers/tests/modeling_tf_openai_gpt_test.py index 675e806c12..863dbf1bc0 100644 --- a/transformers/tests/modeling_tf_openai_gpt_test.py +++ b/transformers/tests/modeling_tf_openai_gpt_test.py @@ -114,7 +114,7 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = OpenAIGPTConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_embd=self.hidden_size, n_layer=self.num_hidden_layers, n_head=self.num_attention_heads, diff --git a/transformers/tests/modeling_tf_roberta_test.py b/transformers/tests/modeling_tf_roberta_test.py index 42440bf1b7..f4ed97c44b 100644 --- a/transformers/tests/modeling_tf_roberta_test.py +++ b/transformers/tests/modeling_tf_roberta_test.py @@ -109,7 +109,7 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): choice_labels = ids_tensor([self.batch_size], self.num_choices) config = RobertaConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, diff --git a/transformers/tests/modeling_tf_transfo_xl_test.py b/transformers/tests/modeling_tf_transfo_xl_test.py index 03e332bdc1..553263250a 100644 --- a/transformers/tests/modeling_tf_transfo_xl_test.py +++ b/transformers/tests/modeling_tf_transfo_xl_test.py @@ -92,7 +92,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) config = TransfoXLConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, mem_len=self.mem_len, clamp_len=self.clamp_len, cutoffs=self.cutoffs, diff --git a/transformers/tests/modeling_tf_xlm_test.py b/transformers/tests/modeling_tf_xlm_test.py index a680b70367..228e436149 100644 --- a/transformers/tests/modeling_tf_xlm_test.py +++ b/transformers/tests/modeling_tf_xlm_test.py @@ -125,7 +125,7 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) config = XLMConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_special=self.n_special, emb_dim=self.hidden_size, n_layers=self.num_hidden_layers, diff --git a/transformers/tests/modeling_tf_xlnet_test.py b/transformers/tests/modeling_tf_xlnet_test.py index 94864b86f2..eb66d92793 100644 --- a/transformers/tests/modeling_tf_xlnet_test.py +++ b/transformers/tests/modeling_tf_xlnet_test.py @@ -64,7 +64,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): num_attention_heads=4, d_inner=128, num_hidden_layers=5, - max_position_embeddings=10, type_sequence_label_size=2, untie_r=True, bi_data=False, @@ -88,7 +87,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): self.num_attention_heads = num_attention_heads self.d_inner = d_inner self.num_hidden_layers = num_hidden_layers - self.max_position_embeddings = max_position_embeddings self.bi_data = bi_data self.untie_r = untie_r self.same_length = same_length @@ -122,13 +120,12 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) config = XLNetConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, d_model=self.hidden_size, n_head=self.num_attention_heads, d_inner=self.d_inner, n_layer=self.num_hidden_layers, untie_r=self.untie_r, - max_position_embeddings=self.max_position_embeddings, mem_len=self.mem_len, clamp_len=self.clamp_len, same_length=self.same_length, diff --git a/transformers/tests/modeling_transfo_xl_test.py b/transformers/tests/modeling_transfo_xl_test.py index 647dd3724d..dca46444ba 100644 --- a/transformers/tests/modeling_transfo_xl_test.py +++ b/transformers/tests/modeling_transfo_xl_test.py @@ -91,7 +91,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester): lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) config = TransfoXLConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, mem_len=self.mem_len, clamp_len=self.clamp_len, cutoffs=self.cutoffs, diff --git a/transformers/tests/modeling_xlm_test.py b/transformers/tests/modeling_xlm_test.py index f6b980767c..7cae6c848e 100644 --- a/transformers/tests/modeling_xlm_test.py +++ b/transformers/tests/modeling_xlm_test.py @@ -121,7 +121,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester): is_impossible_labels = ids_tensor([self.batch_size], 2).float() config = XLMConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, n_special=self.n_special, emb_dim=self.hidden_size, n_layers=self.num_hidden_layers, diff --git a/transformers/tests/modeling_xlnet_test.py b/transformers/tests/modeling_xlnet_test.py index 56b6bb3f4d..6d901ee699 100644 --- a/transformers/tests/modeling_xlnet_test.py +++ b/transformers/tests/modeling_xlnet_test.py @@ -60,7 +60,6 @@ class XLNetModelTest(CommonTestCases.CommonModelTester): num_attention_heads=4, d_inner=128, num_hidden_layers=5, - max_position_embeddings=10, type_sequence_label_size=2, untie_r=True, bi_data=False, @@ -84,7 +83,6 @@ class XLNetModelTest(CommonTestCases.CommonModelTester): self.num_attention_heads = num_attention_heads self.d_inner = d_inner self.num_hidden_layers = num_hidden_layers - self.max_position_embeddings = max_position_embeddings self.bi_data = bi_data self.untie_r = untie_r self.same_length = same_length @@ -116,13 +114,12 @@ class XLNetModelTest(CommonTestCases.CommonModelTester): token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) config = XLNetConfig( - vocab_size_or_config_json_file=self.vocab_size, + vocab_size=self.vocab_size, d_model=self.hidden_size, n_head=self.num_attention_heads, d_inner=self.d_inner, n_layer=self.num_hidden_layers, untie_r=self.untie_r, - max_position_embeddings=self.max_position_embeddings, mem_len=self.mem_len, clamp_len=self.clamp_len, same_length=self.same_length,