Update doc to new model outputs (#5946)

* Update doc to new model outputs

* Fix outputs in quicktour
This commit is contained in:
Sylvain Gugger
2020-07-21 18:13:55 -04:00
committed by GitHub
parent ddd40b3211
commit e714412fe6
16 changed files with 73 additions and 47 deletions

View File

@@ -230,13 +230,18 @@ final activations of the model.
>>> ## PYTORCH CODE >>> ## PYTORCH CODE
>>> print(pt_outputs) >>> print(pt_outputs)
(tensor([[-4.0833, 4.3364], SequenceClassifierOutput(loss=None, logits=tensor([[-4.0833, 4.3364],
[ 0.0818, -0.0418]], grad_fn=<AddmmBackward>),) [ 0.0818, -0.0418]], grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)
>>> ## TENSORFLOW CODE >>> ## TENSORFLOW CODE
>>> print(tf_outputs) >>> print(tf_outputs)
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy= (<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-4.0832963 , 4.3364134 ], array([[-4.0832963 , 4.336414 ],
[ 0.08181238, -0.04178794]], dtype=float32)>,) [ 0.08181786, -0.04179301]], dtype=float32)>,)
The model can return more than just the final activations, which is why the PyTorch output is a special class and the
TensorFlow output is a tuple. Here we only asked for the final activations, so we get a tuple with one element on the
TensorFlow side and a :class:`~transformers.modeling_outputs.SequenceClassifierOutput` with just the ``logits`` field
filled on the PyTorch side.
.. note:: .. note::
@@ -249,7 +254,7 @@ Let's apply the SoftMax activation to get predictions.
>>> ## PYTORCH CODE >>> ## PYTORCH CODE
>>> import torch.nn.functional as F >>> import torch.nn.functional as F
>>> pt_predictions = F.softmax(pt_outputs[0], dim=-1) >>> pt_predictions = F.softmax(pt_outputs.logits, dim=-1)
>>> ## TENSORFLOW CODE >>> ## TENSORFLOW CODE
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1) >>> tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
@@ -262,7 +267,7 @@ We can see we get the numbers from before:
>>> print(tf_predictions) >>> print(tf_predictions)
tf.Tensor( tf.Tensor(
[[2.2042994e-04 9.9977952e-01] [[2.2042994e-04 9.9977952e-01]
[5.3086078e-01 4.6913919e-01]], shape=(2, 2), dtype=float32) [5.3086340e-01 4.6913657e-01]], shape=(2, 2), dtype=float32)
>>> ## PYTORCH CODE >>> ## PYTORCH CODE
>>> print(pt_predictions) >>> print(pt_predictions)
tensor([[2.2043e-04, 9.9978e-01], tensor([[2.2043e-04, 9.9978e-01],
@@ -285,6 +290,12 @@ training loop. 🤗 Transformers also provides a :class:`~transformers.Trainer`
you are using TensorFlow) class to help with your training (taking care of things such as distributed training, mixed you are using TensorFlow) class to help with your training (taking care of things such as distributed training, mixed
precision, etc.). See the :doc:`training tutorial <training>` for more details. precision, etc.). See the :doc:`training tutorial <training>` for more details.
.. note::
Pytorch model outputs are special dataclasses so that you can get autocompletion for their attributes in an IDE.
They also behave like a tuple or a dictionary (e.g., you can index with an integer, a slice or a string) in which
case the attributes not set (that have :obj:`None` values) are ignored.
Once your model is fine-tuned, you can save it with its tokenizer in the following way: Once your model is fine-tuned, you can save it with its tokenizer in the following way:
:: ::

View File

@@ -98,8 +98,8 @@ of each other. The process is the following:
>>> paraphrase = tokenizer(sequence_0, sequence_2, return_tensors="pt") >>> paraphrase = tokenizer(sequence_0, sequence_2, return_tensors="pt")
>>> not_paraphrase = tokenizer(sequence_0, sequence_1, return_tensors="pt") >>> not_paraphrase = tokenizer(sequence_0, sequence_1, return_tensors="pt")
>>> paraphrase_classification_logits = model(**paraphrase)[0] >>> paraphrase_classification_logits = model(**paraphrase).logits
>>> not_paraphrase_classification_logits = model(**not_paraphrase)[0] >>> not_paraphrase_classification_logits = model(**not_paraphrase).logits
>>> paraphrase_results = torch.softmax(paraphrase_classification_logits, dim=1).tolist()[0] >>> paraphrase_results = torch.softmax(paraphrase_classification_logits, dim=1).tolist()[0]
>>> not_paraphrase_results = torch.softmax(not_paraphrase_classification_logits, dim=1).tolist()[0] >>> not_paraphrase_results = torch.softmax(not_paraphrase_classification_logits, dim=1).tolist()[0]
@@ -375,7 +375,7 @@ Here is an example doing masked language modeling using a model and a tokenizer.
>>> input = tokenizer.encode(sequence, return_tensors="pt") >>> input = tokenizer.encode(sequence, return_tensors="pt")
>>> mask_token_index = torch.where(input == tokenizer.mask_token_id)[1] >>> mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
>>> token_logits = model(input)[0] >>> token_logits = model(input).logits
>>> mask_token_logits = token_logits[0, mask_token_index, :] >>> mask_token_logits = token_logits[0, mask_token_index, :]
>>> top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist() >>> top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
@@ -436,7 +436,7 @@ Here is an example using the tokenizer and model and leveraging the :func:`~tran
>>> input_ids = tokenizer.encode(sequence, return_tensors="pt") >>> input_ids = tokenizer.encode(sequence, return_tensors="pt")
>>> # get logits of last hidden state >>> # get logits of last hidden state
>>> next_token_logits = model(input_ids)[0][:, -1, :] >>> next_token_logits = model(input_ids).logits[:, -1, :]
>>> # filter >>> # filter
>>> filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0) >>> filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0)
@@ -666,7 +666,7 @@ Here is an example doing named entity recognition using a model and a tokenizer.
>>> tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sequence))) >>> tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sequence)))
>>> inputs = tokenizer.encode(sequence, return_tensors="pt") >>> inputs = tokenizer.encode(sequence, return_tensors="pt")
>>> outputs = model(inputs)[0] >>> outputs = model(inputs).logits
>>> predictions = torch.argmax(outputs, dim=2) >>> predictions = torch.argmax(outputs, dim=2)
>>> ## TENSORFLOW CODE >>> ## TENSORFLOW CODE
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer >>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer

View File

@@ -99,7 +99,7 @@ backwards pass and update the weights:
labels = torch.tensor([1,0]).unsqueeze(0) labels = torch.tensor([1,0]).unsqueeze(0)
outputs = model(input_ids, attention_mask=attention_mask, labels=labels) outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs[0] loss = outputs.loss
loss.backward() loss.backward()
optimizer.step() optimizer.step()
@@ -111,7 +111,7 @@ The following is equivalent to the previous example:
from torch.nn import functional as F from torch.nn import functional as F
labels = torch.tensor([1,0]).unsqueeze(0) labels = torch.tensor([1,0]).unsqueeze(0)
outputs = model(input_ids, attention_mask=attention_mask) outputs = model(input_ids, attention_mask=attention_mask)
loss = F.cross_entropy(labels, outputs[0]) loss = F.cross_entropy(labels, outputs.logitd)
loss.backward() loss.backward()
optimizer.step() optimizer.step()

View File

@@ -226,7 +226,8 @@ PT_TOKEN_CLASSIFICATION_SAMPLE = r"""
>>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1 >>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
>>> outputs = model(**inputs, labels=labels) >>> outputs = model(**inputs, labels=labels)
>>> loss, scores = outputs[:2] >>> loss = outputs.loss
>>> logits = outputs.logits
""" """
PT_QUESTION_ANSWERING_SAMPLE = r""" PT_QUESTION_ANSWERING_SAMPLE = r"""
@@ -243,7 +244,9 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
>>> end_positions = torch.tensor([3]) >>> end_positions = torch.tensor([3])
>>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions) >>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:3] >>> loss = outputs.loss
>>> start_scores = outputs.start_scores
>>> end_scores = outputs.end_scores
""" """
PT_SEQUENCE_CLASSIFICATION_SAMPLE = r""" PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
@@ -258,7 +261,8 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(**inputs, labels=labels) >>> outputs = model(**inputs, labels=labels)
>>> loss, logits = outputs[:2] >>> loss = outputs.loss
>>> logits = outputs.logits
""" """
PT_MASKED_LM_SAMPLE = r""" PT_MASKED_LM_SAMPLE = r"""
@@ -273,7 +277,8 @@ PT_MASKED_LM_SAMPLE = r"""
>>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"] >>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
>>> outputs = model(input_ids, labels=input_ids) >>> outputs = model(input_ids, labels=input_ids)
>>> loss, prediction_scores = outputs[:2] >>> loss = outputs.loss
>>> prediction_logits = outputs.logits
""" """
PT_BASE_MODEL_SAMPLE = r""" PT_BASE_MODEL_SAMPLE = r"""
@@ -288,7 +293,7 @@ PT_BASE_MODEL_SAMPLE = r"""
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple >>> last_hidden_states = outputs.last_hidden_state
""" """
PT_MULTIPLE_CHOICE_SAMPLE = r""" PT_MULTIPLE_CHOICE_SAMPLE = r"""
@@ -309,7 +314,8 @@ PT_MULTIPLE_CHOICE_SAMPLE = r"""
>>> outputs = model(**{{k: v.unsqueeze(0) for k,v in encoding.items()}}, labels=labels) # batch size is 1 >>> outputs = model(**{{k: v.unsqueeze(0) for k,v in encoding.items()}}, labels=labels) # batch size is 1
>>> # the linear classifier still needs to be trained >>> # the linear classifier still needs to be trained
>>> loss, logits = outputs[:2] >>> loss = outputs.loss
>>> logits = outputs.logits
""" """
PT_CAUSAL_LM_SAMPLE = r""" PT_CAUSAL_LM_SAMPLE = r"""
@@ -323,7 +329,8 @@ PT_CAUSAL_LM_SAMPLE = r"""
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs, labels=inputs["input_ids"]) >>> outputs = model(**inputs, labels=inputs["input_ids"])
>>> loss, logits = outputs[:2] >>> loss = outputs.loss
>>> logits = outputs.logits
""" """
TF_TOKEN_CLASSIFICATION_SAMPLE = r""" TF_TOKEN_CLASSIFICATION_SAMPLE = r"""

View File

@@ -683,7 +683,8 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) >>> outputs = model(input_ids)
>>> prediction_scores, sop_scores = outputs[:2] >>> prediction_logits = outputs.prediction_logits
>>> sop_logits = outputs.sop_logits
""" """

View File

@@ -996,7 +996,7 @@ class BartForConditionalGeneration(PretrainedBartModel):
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large') model = BartForConditionalGeneration.from_pretrained('facebook/bart-large')
input_ids = tokenizer([TXT], return_tensors='pt')['input_ids'] input_ids = tokenizer([TXT], return_tensors='pt')['input_ids']
logits = model(input_ids)[0] logits = model(input_ids).logits
masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
probs = logits[0, masked_index].softmax(dim=0) probs = logits[0, masked_index].softmax(dim=0)

View File

@@ -873,8 +873,8 @@ class BertForPreTraining(BertPreTrainedModel):
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> prediction_scores, seq_relationship_scores = outputs[:2] >>> prediction_logits = outptus.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits
""" """
if "masked_lm_labels" in kwargs: if "masked_lm_labels" in kwargs:
warnings.warn( warnings.warn(
@@ -978,7 +978,7 @@ class BertLMHeadModel(BertPreTrainedModel):
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> prediction_scores = outputs.prediction_scores >>> prediction_logits = outputs.logits
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
@@ -1181,7 +1181,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt') >>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
>>> outputs = model(**encoding, next_sentence_label=torch.LongTensor([1])) >>> outputs = model(**encoding, next_sentence_label=torch.LongTensor([1]))
>>> logits = outputs.seq_relationship_scores >>> logits = outputs.logits
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random >>> assert logits[0, 0] < logits[0, 1] # next sentence was random
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple

View File

@@ -876,8 +876,8 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
>>> outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1 >>> outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
>>> # the linear classifier still needs to be trained >>> # the linear classifier still needs to be trained
>>> loss, logits = outputs[:2] >>> loss = outputs.loss
>>> logits = outputs.logits
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1] num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]

View File

@@ -423,8 +423,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base') tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base') model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"] input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
embeddings = model(input_ids)[0] # the embeddings of the given context. embeddings = model(input_ids).pooler_output
""" """
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@@ -502,7 +501,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base') tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base') model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"] input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
embeddings = model(input_ids)[0] # the embeddings of the given question. embeddings = model(input_ids).pooler_output
""" """
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
@@ -583,9 +582,9 @@ class DPRReader(DPRPretrainedReader):
return_tensors='pt' return_tensors='pt'
) )
outputs = model(**encoded_inputs) outputs = model(**encoded_inputs)
start_logits = outputs[0] # The logits of the start of the spans start_logits = outputs.stat_logits
end_logits = outputs[1] # The logits of the end of the spans end_logits = outputs.end_logits
relevance_logits = outputs[2] # The relevance scores of the passages relevance_logits = outputs.relevance_logits
""" """
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

View File

@@ -525,8 +525,7 @@ class ElectraForPreTraining(ElectraPreTrainedModel):
>>> model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator') >>> model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> scores = model(input_ids)[0] >>> logits = model(input_ids).logits
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple

View File

@@ -754,7 +754,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
>>> mc_token_ids = torch.tensor([cls_token_location]) # Batch size: 1 >>> mc_token_ids = torch.tensor([cls_token_location]) # Batch size: 1
>>> outputs = model(input_ids, mc_token_ids=mc_token_ids) >>> outputs = model(input_ids, mc_token_ids=mc_token_ids)
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2] >>> lm_logits = outputs.lm_logits
>>> mc_logits = outputs.mc_logits
""" """
if "lm_labels" in kwargs: if "lm_labels" in kwargs:

View File

@@ -1090,7 +1090,9 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
>>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM >>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM
... # check ``LongformerModel.forward`` for more details how to set `attention_mask` ... # check ``LongformerModel.forward`` for more details how to set `attention_mask`
>>> loss, prediction_scores = model(input_ids, attention_mask=attention_mask, labels=input_ids) >>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
>>> loss = outputs.loss
>>> prediction_logits = output.logits
""" """
if "masked_lm_labels" in kwargs: if "masked_lm_labels" in kwargs:
@@ -1299,10 +1301,12 @@ class LongformerForQuestionAnswering(BertPreTrainedModel):
>>> # the forward method will automatically set global attention on question tokens >>> # the forward method will automatically set global attention on question tokens
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> start_scores, end_scores = model(input_ids, attention_mask=attention_mask) >>> outputs = model(input_ids, attention_mask=attention_mask)
>>> start_logits = outputs.start_logits
>>> end_logits = outputs.end_logits
>>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist()) >>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
>>> answer_tokens = all_tokens[torch.argmax(start_scores) :torch.argmax(end_scores)+1] >>> answer_tokens = all_tokens[torch.argmax(start_logits) :torch.argmax(end_logits)+1]
>>> answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token >>> answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token
""" """

View File

@@ -979,7 +979,8 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) >>> outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2] >>> prediction_logits = outptus.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
@@ -1186,7 +1187,9 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light." >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt') >>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
>>> loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1])) >>> outputs = model(**encoding, next_sentence_label=torch.LongTensor([1]))
>>> loss = outputs.loss
>>> logits = outputs.logits
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple

View File

@@ -659,7 +659,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
mc_token_ids = torch.tensor([input_ids.size(-1)-1, input_ids.size(-1)-1]).unsqueeze(0) # Batch size 1 mc_token_ids = torch.tensor([input_ids.size(-1)-1, input_ids.size(-1)-1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, mc_token_ids=mc_token_ids) outputs = model(input_ids, mc_token_ids=mc_token_ids)
lm_prediction_scores, mc_prediction_scores = outputs[:2] lm_logits = outputs.lm_logits
mc_logits = outputs.mc_logits
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
if "lm_labels" in kwargs: if "lm_labels" in kwargs:

View File

@@ -989,7 +989,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
>>> end_positions = torch.tensor([3]) >>> end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss = outputs[0] >>> loss = outputs.loss
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple

View File

@@ -1366,8 +1366,8 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels) outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels)
loss, next_token_logits = outputs[:2] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] loss = outputs.loss
next_token_logits = outputs.logits # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache) use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)
@@ -1876,7 +1876,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
>>> end_positions = torch.tensor([3]) >>> end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss = outputs[0] >>> loss = outputs.loss
""" """
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache) use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)