diff --git a/.coveragerc b/.coveragerc index e0d5674aa0..fa6c165a8a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,6 +3,7 @@ source=pytorch_transformers omit = # skip convertion scripts from testing for now */convert_* + */__main__.py [report] exclude_lines = pragma: no cover diff --git a/examples/test_examples.py b/examples/test_examples.py index 8284858a12..56c30efae4 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -48,7 +48,7 @@ class ExamplesTests(unittest.TestCase): testargs = ["run_glue.py", "--data_dir=./examples/tests_samples/MRPC/", "--task_name=mrpc", "--do_train", "--do_eval", "--output_dir=./examples/tests_samples/temp_dir", "--train_batch_size=4", "--eval_batch_size=2", "--num_train_epochs=2.0", "--overwrite_output_dir"] - model_name = "--model_name=xlnet-large-cased" + model_name = "--model_name=bert-base-uncased" with patch.object(sys, 'argv', testargs + [model_name]): result = run_glue.main() for value in result.values(): diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py index 7fefbefeae..840016098a 100644 --- a/pytorch_transformers/modeling_gpt2.py +++ b/pytorch_transformers/modeling_gpt2.py @@ -119,9 +119,12 @@ class GPT2Config(PretrainedConfig): layer_norm_epsilon=1e-5, initializer_range=0.02, predict_special_tokens=True, + + num_labels=1, summary_type='token_ids', summary_use_proj=True, summary_activation=None, + summary_proj_to_labels=True, summary_first_dropout=0.1, **kwargs ): @@ -168,10 +171,13 @@ class GPT2Config(PretrainedConfig): self.layer_norm_epsilon = layer_norm_epsilon self.initializer_range = initializer_range self.predict_special_tokens = predict_special_tokens + + self.num_labels = num_labels self.summary_type = summary_type self.summary_use_proj = summary_use_proj self.summary_activation = summary_activation self.summary_first_dropout = summary_first_dropout + self.summary_proj_to_labels = summary_proj_to_labels else: raise ValueError( "First argument must be either a vocabulary size (int)" diff --git a/pytorch_transformers/modeling_openai.py b/pytorch_transformers/modeling_openai.py index c99df42035..024ff8eb41 100644 --- a/pytorch_transformers/modeling_openai.py +++ b/pytorch_transformers/modeling_openai.py @@ -147,9 +147,12 @@ class OpenAIGPTConfig(PretrainedConfig): layer_norm_epsilon=1e-5, initializer_range=0.02, predict_special_tokens=True, + + num_labels=1, summary_type='token_ids', summary_use_proj=True, summary_activation=None, + summary_proj_to_labels=True, summary_first_dropout=0.1, **kwargs ): @@ -199,10 +202,13 @@ class OpenAIGPTConfig(PretrainedConfig): self.layer_norm_epsilon = layer_norm_epsilon self.initializer_range = initializer_range self.predict_special_tokens = predict_special_tokens + + self.num_labels = num_labels self.summary_type = summary_type self.summary_use_proj = summary_use_proj self.summary_activation = summary_activation self.summary_first_dropout = summary_first_dropout + self.summary_proj_to_labels = summary_proj_to_labels else: raise ValueError( "First argument must be either a vocabulary size (int)" diff --git a/pytorch_transformers/tests/modeling_tests_commons.py b/pytorch_transformers/tests/modeling_tests_commons.py index db79b017c1..5535177aaa 100644 --- a/pytorch_transformers/tests/modeling_tests_commons.py +++ b/pytorch_transformers/tests/modeling_tests_commons.py @@ -396,7 +396,7 @@ class GPTModelTester(object): model = self.double_head_model_class(config) model.eval() outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels, - token_type_ids=token_type_ids, position_ids=position_ids) + token_type_ids=token_type_ids, position_ids=position_ids) lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4] loss = [lm_loss, mc_loss]