From 7fba47b7d98d918c6dac5dcc50123ebb8eec9685 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Wed, 4 Sep 2019 22:29:17 +0200 Subject: [PATCH] WIP reordering --- pytorch_transformers/modeling_gpt2.py | 4 ++-- pytorch_transformers/modeling_openai.py | 6 +++--- pytorch_transformers/tests/modeling_roberta_test.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py index 38f67338fc..d16448beaa 100644 --- a/pytorch_transformers/modeling_gpt2.py +++ b/pytorch_transformers/modeling_gpt2.py @@ -549,7 +549,7 @@ class GPT2Model(GPT2PreTrainedModel): all_hidden_states = all_hidden_states + (hidden_states.view(*output_shape),) outputs = block(hidden_states, - past=layer_past, + layer_past=layer_past, attention_mask=attention_mask, head_mask=head_mask[i]) @@ -666,7 +666,7 @@ the classification head takes as input the input of a specified classification t """, GPT2_START_DOCSTRING, GPT2_INPUTS_DOCSTRING) class GPT2DoubleHeadsModel(GPT2PreTrainedModel): r""" - **mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``: + **mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``: Index of the classification token in each input sequence. Selected in the range ``[0, input_ids.size(-1) - 1[``. **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: diff --git a/pytorch_transformers/modeling_openai.py b/pytorch_transformers/modeling_openai.py index 55f0a4e8d2..4fbec7a768 100644 --- a/pytorch_transformers/modeling_openai.py +++ b/pytorch_transformers/modeling_openai.py @@ -636,7 +636,7 @@ the classification head takes as input the input of a specified classification t """, OPENAI_GPT_START_DOCSTRING, OPENAI_GPT_INPUTS_DOCSTRING) class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): r""" - **mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``: + **mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``: Index of the classification token in each input sequence. Selected in the range ``[0, input_ids.size(-1) - 1[``. **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: @@ -678,7 +678,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, mc_token_ids) + outputs = model(input_ids, mc_token_ids=mc_token_ids) lm_prediction_scores, mc_prediction_scores = outputs[:2] """ @@ -700,7 +700,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): self.transformer.tokens_embed) def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, - lm_labels=None, mc_labels=None): + mc_token_ids=None, lm_labels=None, mc_labels=None): transformer_outputs = self.transformer(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, diff --git a/pytorch_transformers/tests/modeling_roberta_test.py b/pytorch_transformers/tests/modeling_roberta_test.py index 94035e9667..0279f3756b 100644 --- a/pytorch_transformers/tests/modeling_roberta_test.py +++ b/pytorch_transformers/tests/modeling_roberta_test.py @@ -123,8 +123,8 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): token_labels, choice_labels): model = RobertaModel(config=config) model.eval() - sequence_output, pooled_output = model(input_ids, token_type_ids, input_mask) - sequence_output, pooled_output = model(input_ids, token_type_ids) + sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) + sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids) sequence_output, pooled_output = model(input_ids) result = { @@ -140,7 +140,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): token_labels, choice_labels): model = RobertaForMaskedLM(config=config) model.eval() - loss, prediction_scores = model(input_ids, token_type_ids, input_mask, token_labels) + loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels) result = { "loss": loss, "prediction_scores": prediction_scores,