From 7fba47b7d98d918c6dac5dcc50123ebb8eec9685 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Wed, 4 Sep 2019 22:29:17 +0200
Subject: [PATCH] WIP reordering

---
 pytorch_transformers/modeling_gpt2.py               | 4 ++--
 pytorch_transformers/modeling_openai.py             | 6 +++---
 pytorch_transformers/tests/modeling_roberta_test.py | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py
index 38f67338fc..d16448beaa 100644
--- a/pytorch_transformers/modeling_gpt2.py
+++ b/pytorch_transformers/modeling_gpt2.py
@@ -549,7 +549,7 @@ class GPT2Model(GPT2PreTrainedModel):
                 all_hidden_states = all_hidden_states + (hidden_states.view(*output_shape),)
 
             outputs = block(hidden_states,
-                            past=layer_past,
+                            layer_past=layer_past,
                             attention_mask=attention_mask,
                             head_mask=head_mask[i])
 
@@ -666,7 +666,7 @@ the classification head takes as input the input of a specified classification t
 """, GPT2_START_DOCSTRING, GPT2_INPUTS_DOCSTRING)
 class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
     r"""
-        **mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
+        **mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
             Index of the classification token in each input sequence.
             Selected in the range ``[0, input_ids.size(-1) - 1[``.
         **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
diff --git a/pytorch_transformers/modeling_openai.py b/pytorch_transformers/modeling_openai.py
index 55f0a4e8d2..4fbec7a768 100644
--- a/pytorch_transformers/modeling_openai.py
+++ b/pytorch_transformers/modeling_openai.py
@@ -636,7 +636,7 @@ the classification head takes as input the input of a specified classification t
 """, OPENAI_GPT_START_DOCSTRING, OPENAI_GPT_INPUTS_DOCSTRING)
 class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
     r"""
-        **mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
+        **mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
             Index of the classification token in each input sequence.
             Selected in the range ``[0, input_ids.size(-1) - 1[``.
         **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
@@ -678,7 +678,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
         choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
         input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0)  # Batch size 1, 2 choices
         mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0)  # Batch size 1
-        outputs = model(input_ids, mc_token_ids)
+        outputs = model(input_ids, mc_token_ids=mc_token_ids)
         lm_prediction_scores, mc_prediction_scores = outputs[:2]
 
     """
@@ -700,7 +700,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
                                    self.transformer.tokens_embed)
 
     def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,
-                lm_labels=None, mc_labels=None):
+                mc_token_ids=None, lm_labels=None, mc_labels=None):
         transformer_outputs = self.transformer(input_ids,
                                                attention_mask=attention_mask,
                                                token_type_ids=token_type_ids,
diff --git a/pytorch_transformers/tests/modeling_roberta_test.py b/pytorch_transformers/tests/modeling_roberta_test.py
index 94035e9667..0279f3756b 100644
--- a/pytorch_transformers/tests/modeling_roberta_test.py
+++ b/pytorch_transformers/tests/modeling_roberta_test.py
@@ -123,8 +123,8 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
                                            token_labels, choice_labels):
             model = RobertaModel(config=config)
             model.eval()
-            sequence_output, pooled_output = model(input_ids, token_type_ids, input_mask)
-            sequence_output, pooled_output = model(input_ids, token_type_ids)
+            sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
+            sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids)
             sequence_output, pooled_output = model(input_ids)
 
             result = {
@@ -140,7 +140,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
                                                    token_labels, choice_labels):
             model = RobertaForMaskedLM(config=config)
             model.eval()
-            loss, prediction_scores = model(input_ids, token_type_ids, input_mask, token_labels)
+            loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels)
             result = {
                 "loss": loss,
                 "prediction_scores": prediction_scores,