WIP reordering
This commit is contained in:
@@ -549,7 +549,7 @@ class GPT2Model(GPT2PreTrainedModel):
|
||||
all_hidden_states = all_hidden_states + (hidden_states.view(*output_shape),)
|
||||
|
||||
outputs = block(hidden_states,
|
||||
past=layer_past,
|
||||
layer_past=layer_past,
|
||||
attention_mask=attention_mask,
|
||||
head_mask=head_mask[i])
|
||||
|
||||
@@ -666,7 +666,7 @@ the classification head takes as input the input of a specified classification t
|
||||
""", GPT2_START_DOCSTRING, GPT2_INPUTS_DOCSTRING)
|
||||
class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
||||
r"""
|
||||
**mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
|
||||
**mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
|
||||
Index of the classification token in each input sequence.
|
||||
Selected in the range ``[0, input_ids.size(-1) - 1[``.
|
||||
**lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
||||
|
||||
@@ -636,7 +636,7 @@ the classification head takes as input the input of a specified classification t
|
||||
""", OPENAI_GPT_START_DOCSTRING, OPENAI_GPT_INPUTS_DOCSTRING)
|
||||
class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
||||
r"""
|
||||
**mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
|
||||
**mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
|
||||
Index of the classification token in each input sequence.
|
||||
Selected in the range ``[0, input_ids.size(-1) - 1[``.
|
||||
**lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
||||
@@ -678,7 +678,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
||||
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
||||
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
|
||||
mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0) # Batch size 1
|
||||
outputs = model(input_ids, mc_token_ids)
|
||||
outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||
lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
||||
|
||||
"""
|
||||
@@ -700,7 +700,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
||||
self.transformer.tokens_embed)
|
||||
|
||||
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,
|
||||
lm_labels=None, mc_labels=None):
|
||||
mc_token_ids=None, lm_labels=None, mc_labels=None):
|
||||
transformer_outputs = self.transformer(input_ids,
|
||||
attention_mask=attention_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
|
||||
@@ -123,8 +123,8 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
|
||||
token_labels, choice_labels):
|
||||
model = RobertaModel(config=config)
|
||||
model.eval()
|
||||
sequence_output, pooled_output = model(input_ids, token_type_ids, input_mask)
|
||||
sequence_output, pooled_output = model(input_ids, token_type_ids)
|
||||
sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
|
||||
sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids)
|
||||
sequence_output, pooled_output = model(input_ids)
|
||||
|
||||
result = {
|
||||
@@ -140,7 +140,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
|
||||
token_labels, choice_labels):
|
||||
model = RobertaForMaskedLM(config=config)
|
||||
model.eval()
|
||||
loss, prediction_scores = model(input_ids, token_type_ids, input_mask, token_labels)
|
||||
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels)
|
||||
result = {
|
||||
"loss": loss,
|
||||
"prediction_scores": prediction_scores,
|
||||
|
||||
Reference in New Issue
Block a user