WIP reordering
This commit is contained in:
@@ -549,7 +549,7 @@ class GPT2Model(GPT2PreTrainedModel):
|
|||||||
all_hidden_states = all_hidden_states + (hidden_states.view(*output_shape),)
|
all_hidden_states = all_hidden_states + (hidden_states.view(*output_shape),)
|
||||||
|
|
||||||
outputs = block(hidden_states,
|
outputs = block(hidden_states,
|
||||||
past=layer_past,
|
layer_past=layer_past,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
head_mask=head_mask[i])
|
head_mask=head_mask[i])
|
||||||
|
|
||||||
@@ -666,7 +666,7 @@ the classification head takes as input the input of a specified classification t
|
|||||||
""", GPT2_START_DOCSTRING, GPT2_INPUTS_DOCSTRING)
|
""", GPT2_START_DOCSTRING, GPT2_INPUTS_DOCSTRING)
|
||||||
class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
||||||
r"""
|
r"""
|
||||||
**mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
|
**mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
|
||||||
Index of the classification token in each input sequence.
|
Index of the classification token in each input sequence.
|
||||||
Selected in the range ``[0, input_ids.size(-1) - 1[``.
|
Selected in the range ``[0, input_ids.size(-1) - 1[``.
|
||||||
**lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
**lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
||||||
|
|||||||
@@ -636,7 +636,7 @@ the classification head takes as input the input of a specified classification t
|
|||||||
""", OPENAI_GPT_START_DOCSTRING, OPENAI_GPT_INPUTS_DOCSTRING)
|
""", OPENAI_GPT_START_DOCSTRING, OPENAI_GPT_INPUTS_DOCSTRING)
|
||||||
class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
||||||
r"""
|
r"""
|
||||||
**mc_token_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
|
**mc_token_ids**: (`optional`, default to index of the last token of the input) ``torch.LongTensor`` of shape ``(batch_size, num_choices)``:
|
||||||
Index of the classification token in each input sequence.
|
Index of the classification token in each input sequence.
|
||||||
Selected in the range ``[0, input_ids.size(-1) - 1[``.
|
Selected in the range ``[0, input_ids.size(-1) - 1[``.
|
||||||
**lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
**lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
||||||
@@ -678,7 +678,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
|
||||||
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
|
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
|
||||||
mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0) # Batch size 1
|
mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0) # Batch size 1
|
||||||
outputs = model(input_ids, mc_token_ids)
|
outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||||
lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -700,7 +700,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
self.transformer.tokens_embed)
|
self.transformer.tokens_embed)
|
||||||
|
|
||||||
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,
|
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None,
|
||||||
lm_labels=None, mc_labels=None):
|
mc_token_ids=None, lm_labels=None, mc_labels=None):
|
||||||
transformer_outputs = self.transformer(input_ids,
|
transformer_outputs = self.transformer(input_ids,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
token_type_ids=token_type_ids,
|
token_type_ids=token_type_ids,
|
||||||
|
|||||||
@@ -123,8 +123,8 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
|
|||||||
token_labels, choice_labels):
|
token_labels, choice_labels):
|
||||||
model = RobertaModel(config=config)
|
model = RobertaModel(config=config)
|
||||||
model.eval()
|
model.eval()
|
||||||
sequence_output, pooled_output = model(input_ids, token_type_ids, input_mask)
|
sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
|
||||||
sequence_output, pooled_output = model(input_ids, token_type_ids)
|
sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids)
|
||||||
sequence_output, pooled_output = model(input_ids)
|
sequence_output, pooled_output = model(input_ids)
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
@@ -140,7 +140,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
|
|||||||
token_labels, choice_labels):
|
token_labels, choice_labels):
|
||||||
model = RobertaForMaskedLM(config=config)
|
model = RobertaForMaskedLM(config=config)
|
||||||
model.eval()
|
model.eval()
|
||||||
loss, prediction_scores = model(input_ids, token_type_ids, input_mask, token_labels)
|
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels)
|
||||||
result = {
|
result = {
|
||||||
"loss": loss,
|
"loss": loss,
|
||||||
"prediction_scores": prediction_scores,
|
"prediction_scores": prediction_scores,
|
||||||
|
|||||||
Reference in New Issue
Block a user