Update doc to new model outputs (#5946)
* Update doc to new model outputs * Fix outputs in quicktour
This commit is contained in:
@@ -230,13 +230,18 @@ final activations of the model.
|
|||||||
|
|
||||||
>>> ## PYTORCH CODE
|
>>> ## PYTORCH CODE
|
||||||
>>> print(pt_outputs)
|
>>> print(pt_outputs)
|
||||||
(tensor([[-4.0833, 4.3364],
|
SequenceClassifierOutput(loss=None, logits=tensor([[-4.0833, 4.3364],
|
||||||
[ 0.0818, -0.0418]], grad_fn=<AddmmBackward>),)
|
[ 0.0818, -0.0418]], grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)
|
||||||
>>> ## TENSORFLOW CODE
|
>>> ## TENSORFLOW CODE
|
||||||
>>> print(tf_outputs)
|
>>> print(tf_outputs)
|
||||||
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
|
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
|
||||||
array([[-4.0832963 , 4.3364134 ],
|
array([[-4.0832963 , 4.336414 ],
|
||||||
[ 0.08181238, -0.04178794]], dtype=float32)>,)
|
[ 0.08181786, -0.04179301]], dtype=float32)>,)
|
||||||
|
|
||||||
|
The model can return more than just the final activations, which is why the PyTorch output is a special class and the
|
||||||
|
TensorFlow output is a tuple. Here we only asked for the final activations, so we get a tuple with one element on the
|
||||||
|
TensorFlow side and a :class:`~transformers.modeling_outputs.SequenceClassifierOutput` with just the ``logits`` field
|
||||||
|
filled on the PyTorch side.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
@@ -249,7 +254,7 @@ Let's apply the SoftMax activation to get predictions.
|
|||||||
|
|
||||||
>>> ## PYTORCH CODE
|
>>> ## PYTORCH CODE
|
||||||
>>> import torch.nn.functional as F
|
>>> import torch.nn.functional as F
|
||||||
>>> pt_predictions = F.softmax(pt_outputs[0], dim=-1)
|
>>> pt_predictions = F.softmax(pt_outputs.logits, dim=-1)
|
||||||
>>> ## TENSORFLOW CODE
|
>>> ## TENSORFLOW CODE
|
||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
>>> tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
|
>>> tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
|
||||||
@@ -262,7 +267,7 @@ We can see we get the numbers from before:
|
|||||||
>>> print(tf_predictions)
|
>>> print(tf_predictions)
|
||||||
tf.Tensor(
|
tf.Tensor(
|
||||||
[[2.2042994e-04 9.9977952e-01]
|
[[2.2042994e-04 9.9977952e-01]
|
||||||
[5.3086078e-01 4.6913919e-01]], shape=(2, 2), dtype=float32)
|
[5.3086340e-01 4.6913657e-01]], shape=(2, 2), dtype=float32)
|
||||||
>>> ## PYTORCH CODE
|
>>> ## PYTORCH CODE
|
||||||
>>> print(pt_predictions)
|
>>> print(pt_predictions)
|
||||||
tensor([[2.2043e-04, 9.9978e-01],
|
tensor([[2.2043e-04, 9.9978e-01],
|
||||||
@@ -285,6 +290,12 @@ training loop. 🤗 Transformers also provides a :class:`~transformers.Trainer`
|
|||||||
you are using TensorFlow) class to help with your training (taking care of things such as distributed training, mixed
|
you are using TensorFlow) class to help with your training (taking care of things such as distributed training, mixed
|
||||||
precision, etc.). See the :doc:`training tutorial <training>` for more details.
|
precision, etc.). See the :doc:`training tutorial <training>` for more details.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Pytorch model outputs are special dataclasses so that you can get autocompletion for their attributes in an IDE.
|
||||||
|
They also behave like a tuple or a dictionary (e.g., you can index with an integer, a slice or a string) in which
|
||||||
|
case the attributes not set (that have :obj:`None` values) are ignored.
|
||||||
|
|
||||||
Once your model is fine-tuned, you can save it with its tokenizer in the following way:
|
Once your model is fine-tuned, you can save it with its tokenizer in the following way:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|||||||
@@ -98,8 +98,8 @@ of each other. The process is the following:
|
|||||||
>>> paraphrase = tokenizer(sequence_0, sequence_2, return_tensors="pt")
|
>>> paraphrase = tokenizer(sequence_0, sequence_2, return_tensors="pt")
|
||||||
>>> not_paraphrase = tokenizer(sequence_0, sequence_1, return_tensors="pt")
|
>>> not_paraphrase = tokenizer(sequence_0, sequence_1, return_tensors="pt")
|
||||||
|
|
||||||
>>> paraphrase_classification_logits = model(**paraphrase)[0]
|
>>> paraphrase_classification_logits = model(**paraphrase).logits
|
||||||
>>> not_paraphrase_classification_logits = model(**not_paraphrase)[0]
|
>>> not_paraphrase_classification_logits = model(**not_paraphrase).logits
|
||||||
|
|
||||||
>>> paraphrase_results = torch.softmax(paraphrase_classification_logits, dim=1).tolist()[0]
|
>>> paraphrase_results = torch.softmax(paraphrase_classification_logits, dim=1).tolist()[0]
|
||||||
>>> not_paraphrase_results = torch.softmax(not_paraphrase_classification_logits, dim=1).tolist()[0]
|
>>> not_paraphrase_results = torch.softmax(not_paraphrase_classification_logits, dim=1).tolist()[0]
|
||||||
@@ -375,7 +375,7 @@ Here is an example doing masked language modeling using a model and a tokenizer.
|
|||||||
>>> input = tokenizer.encode(sequence, return_tensors="pt")
|
>>> input = tokenizer.encode(sequence, return_tensors="pt")
|
||||||
>>> mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
|
>>> mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
|
||||||
|
|
||||||
>>> token_logits = model(input)[0]
|
>>> token_logits = model(input).logits
|
||||||
>>> mask_token_logits = token_logits[0, mask_token_index, :]
|
>>> mask_token_logits = token_logits[0, mask_token_index, :]
|
||||||
|
|
||||||
>>> top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
>>> top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
||||||
@@ -436,7 +436,7 @@ Here is an example using the tokenizer and model and leveraging the :func:`~tran
|
|||||||
>>> input_ids = tokenizer.encode(sequence, return_tensors="pt")
|
>>> input_ids = tokenizer.encode(sequence, return_tensors="pt")
|
||||||
|
|
||||||
>>> # get logits of last hidden state
|
>>> # get logits of last hidden state
|
||||||
>>> next_token_logits = model(input_ids)[0][:, -1, :]
|
>>> next_token_logits = model(input_ids).logits[:, -1, :]
|
||||||
|
|
||||||
>>> # filter
|
>>> # filter
|
||||||
>>> filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0)
|
>>> filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0)
|
||||||
@@ -666,7 +666,7 @@ Here is an example doing named entity recognition using a model and a tokenizer.
|
|||||||
>>> tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sequence)))
|
>>> tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sequence)))
|
||||||
>>> inputs = tokenizer.encode(sequence, return_tensors="pt")
|
>>> inputs = tokenizer.encode(sequence, return_tensors="pt")
|
||||||
|
|
||||||
>>> outputs = model(inputs)[0]
|
>>> outputs = model(inputs).logits
|
||||||
>>> predictions = torch.argmax(outputs, dim=2)
|
>>> predictions = torch.argmax(outputs, dim=2)
|
||||||
>>> ## TENSORFLOW CODE
|
>>> ## TENSORFLOW CODE
|
||||||
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer
|
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ backwards pass and update the weights:
|
|||||||
|
|
||||||
labels = torch.tensor([1,0]).unsqueeze(0)
|
labels = torch.tensor([1,0]).unsqueeze(0)
|
||||||
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
|
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
|
||||||
loss = outputs[0]
|
loss = outputs.loss
|
||||||
loss.backward()
|
loss.backward()
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
@@ -111,7 +111,7 @@ The following is equivalent to the previous example:
|
|||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
labels = torch.tensor([1,0]).unsqueeze(0)
|
labels = torch.tensor([1,0]).unsqueeze(0)
|
||||||
outputs = model(input_ids, attention_mask=attention_mask)
|
outputs = model(input_ids, attention_mask=attention_mask)
|
||||||
loss = F.cross_entropy(labels, outputs[0])
|
loss = F.cross_entropy(labels, outputs.logitd)
|
||||||
loss.backward()
|
loss.backward()
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
|
|||||||
@@ -226,7 +226,8 @@ PT_TOKEN_CLASSIFICATION_SAMPLE = r"""
|
|||||||
>>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
|
>>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
|
||||||
|
|
||||||
>>> outputs = model(**inputs, labels=labels)
|
>>> outputs = model(**inputs, labels=labels)
|
||||||
>>> loss, scores = outputs[:2]
|
>>> loss = outputs.loss
|
||||||
|
>>> logits = outputs.logits
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PT_QUESTION_ANSWERING_SAMPLE = r"""
|
PT_QUESTION_ANSWERING_SAMPLE = r"""
|
||||||
@@ -243,7 +244,9 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
|
|||||||
>>> end_positions = torch.tensor([3])
|
>>> end_positions = torch.tensor([3])
|
||||||
|
|
||||||
>>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
|
>>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
|
||||||
>>> loss, start_scores, end_scores = outputs[:3]
|
>>> loss = outputs.loss
|
||||||
|
>>> start_scores = outputs.start_scores
|
||||||
|
>>> end_scores = outputs.end_scores
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
|
PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
|
||||||
@@ -258,7 +261,8 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
|
|||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||||
>>> outputs = model(**inputs, labels=labels)
|
>>> outputs = model(**inputs, labels=labels)
|
||||||
>>> loss, logits = outputs[:2]
|
>>> loss = outputs.loss
|
||||||
|
>>> logits = outputs.logits
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PT_MASKED_LM_SAMPLE = r"""
|
PT_MASKED_LM_SAMPLE = r"""
|
||||||
@@ -273,7 +277,8 @@ PT_MASKED_LM_SAMPLE = r"""
|
|||||||
>>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
|
>>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
|
||||||
|
|
||||||
>>> outputs = model(input_ids, labels=input_ids)
|
>>> outputs = model(input_ids, labels=input_ids)
|
||||||
>>> loss, prediction_scores = outputs[:2]
|
>>> loss = outputs.loss
|
||||||
|
>>> prediction_logits = outputs.logits
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PT_BASE_MODEL_SAMPLE = r"""
|
PT_BASE_MODEL_SAMPLE = r"""
|
||||||
@@ -288,7 +293,7 @@ PT_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
|
|
||||||
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
|
>>> last_hidden_states = outputs.last_hidden_state
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PT_MULTIPLE_CHOICE_SAMPLE = r"""
|
PT_MULTIPLE_CHOICE_SAMPLE = r"""
|
||||||
@@ -309,7 +314,8 @@ PT_MULTIPLE_CHOICE_SAMPLE = r"""
|
|||||||
>>> outputs = model(**{{k: v.unsqueeze(0) for k,v in encoding.items()}}, labels=labels) # batch size is 1
|
>>> outputs = model(**{{k: v.unsqueeze(0) for k,v in encoding.items()}}, labels=labels) # batch size is 1
|
||||||
|
|
||||||
>>> # the linear classifier still needs to be trained
|
>>> # the linear classifier still needs to be trained
|
||||||
>>> loss, logits = outputs[:2]
|
>>> loss = outputs.loss
|
||||||
|
>>> logits = outputs.logits
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PT_CAUSAL_LM_SAMPLE = r"""
|
PT_CAUSAL_LM_SAMPLE = r"""
|
||||||
@@ -323,7 +329,8 @@ PT_CAUSAL_LM_SAMPLE = r"""
|
|||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs, labels=inputs["input_ids"])
|
>>> outputs = model(**inputs, labels=inputs["input_ids"])
|
||||||
>>> loss, logits = outputs[:2]
|
>>> loss = outputs.loss
|
||||||
|
>>> logits = outputs.logits
|
||||||
"""
|
"""
|
||||||
|
|
||||||
TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
|
TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
|
||||||
|
|||||||
@@ -683,7 +683,8 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
|||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||||
>>> outputs = model(input_ids)
|
>>> outputs = model(input_ids)
|
||||||
|
|
||||||
>>> prediction_scores, sop_scores = outputs[:2]
|
>>> prediction_logits = outputs.prediction_logits
|
||||||
|
>>> sop_logits = outputs.sop_logits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
@@ -996,7 +996,7 @@ class BartForConditionalGeneration(PretrainedBartModel):
|
|||||||
|
|
||||||
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large')
|
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large')
|
||||||
input_ids = tokenizer([TXT], return_tensors='pt')['input_ids']
|
input_ids = tokenizer([TXT], return_tensors='pt')['input_ids']
|
||||||
logits = model(input_ids)[0]
|
logits = model(input_ids).logits
|
||||||
|
|
||||||
masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
|
masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
|
||||||
probs = logits[0, masked_index].softmax(dim=0)
|
probs = logits[0, masked_index].softmax(dim=0)
|
||||||
|
|||||||
@@ -873,8 +873,8 @@ class BertForPreTraining(BertPreTrainedModel):
|
|||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
|
|
||||||
>>> prediction_scores, seq_relationship_scores = outputs[:2]
|
>>> prediction_logits = outptus.prediction_logits
|
||||||
|
>>> seq_relationship_logits = outputs.seq_relationship_logits
|
||||||
"""
|
"""
|
||||||
if "masked_lm_labels" in kwargs:
|
if "masked_lm_labels" in kwargs:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
@@ -978,7 +978,7 @@ class BertLMHeadModel(BertPreTrainedModel):
|
|||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
|
|
||||||
>>> prediction_scores = outputs.prediction_scores
|
>>> prediction_logits = outputs.logits
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
|
|
||||||
@@ -1181,7 +1181,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
|
|||||||
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
|
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
|
||||||
|
|
||||||
>>> outputs = model(**encoding, next_sentence_label=torch.LongTensor([1]))
|
>>> outputs = model(**encoding, next_sentence_label=torch.LongTensor([1]))
|
||||||
>>> logits = outputs.seq_relationship_scores
|
>>> logits = outputs.logits
|
||||||
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
|
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
|
|||||||
@@ -876,8 +876,8 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
|
|||||||
>>> outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
|
>>> outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
|
||||||
|
|
||||||
>>> # the linear classifier still needs to be trained
|
>>> # the linear classifier still needs to be trained
|
||||||
>>> loss, logits = outputs[:2]
|
>>> loss = outputs.loss
|
||||||
|
>>> logits = outputs.logits
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||||
|
|||||||
@@ -423,8 +423,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
|
|||||||
tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
||||||
model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
|
||||||
input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
|
input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
|
||||||
embeddings = model(input_ids)[0] # the embeddings of the given context.
|
embeddings = model(input_ids).pooler_output
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
@@ -502,7 +501,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
|
|||||||
tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
||||||
model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
|
||||||
input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
|
input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
|
||||||
embeddings = model(input_ids)[0] # the embeddings of the given question.
|
embeddings = model(input_ids).pooler_output
|
||||||
"""
|
"""
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
output_hidden_states = (
|
output_hidden_states = (
|
||||||
@@ -583,9 +582,9 @@ class DPRReader(DPRPretrainedReader):
|
|||||||
return_tensors='pt'
|
return_tensors='pt'
|
||||||
)
|
)
|
||||||
outputs = model(**encoded_inputs)
|
outputs = model(**encoded_inputs)
|
||||||
start_logits = outputs[0] # The logits of the start of the spans
|
start_logits = outputs.stat_logits
|
||||||
end_logits = outputs[1] # The logits of the end of the spans
|
end_logits = outputs.end_logits
|
||||||
relevance_logits = outputs[2] # The relevance scores of the passages
|
relevance_logits = outputs.relevance_logits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
|
|||||||
@@ -525,8 +525,7 @@ class ElectraForPreTraining(ElectraPreTrainedModel):
|
|||||||
>>> model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator')
|
>>> model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator')
|
||||||
|
|
||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||||
>>> scores = model(input_ids)[0]
|
>>> logits = model(input_ids).logits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
|
|
||||||
|
|||||||
@@ -754,7 +754,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
|||||||
>>> mc_token_ids = torch.tensor([cls_token_location]) # Batch size: 1
|
>>> mc_token_ids = torch.tensor([cls_token_location]) # Batch size: 1
|
||||||
|
|
||||||
>>> outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
>>> outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||||
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
>>> lm_logits = outputs.lm_logits
|
||||||
|
>>> mc_logits = outputs.mc_logits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if "lm_labels" in kwargs:
|
if "lm_labels" in kwargs:
|
||||||
|
|||||||
@@ -1090,7 +1090,9 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
|
|||||||
|
|
||||||
>>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM
|
>>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM
|
||||||
... # check ``LongformerModel.forward`` for more details how to set `attention_mask`
|
... # check ``LongformerModel.forward`` for more details how to set `attention_mask`
|
||||||
>>> loss, prediction_scores = model(input_ids, attention_mask=attention_mask, labels=input_ids)
|
>>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
|
||||||
|
>>> loss = outputs.loss
|
||||||
|
>>> prediction_logits = output.logits
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if "masked_lm_labels" in kwargs:
|
if "masked_lm_labels" in kwargs:
|
||||||
@@ -1299,10 +1301,12 @@ class LongformerForQuestionAnswering(BertPreTrainedModel):
|
|||||||
>>> # the forward method will automatically set global attention on question tokens
|
>>> # the forward method will automatically set global attention on question tokens
|
||||||
>>> attention_mask = encoding["attention_mask"]
|
>>> attention_mask = encoding["attention_mask"]
|
||||||
|
|
||||||
>>> start_scores, end_scores = model(input_ids, attention_mask=attention_mask)
|
>>> outputs = model(input_ids, attention_mask=attention_mask)
|
||||||
|
>>> start_logits = outputs.start_logits
|
||||||
|
>>> end_logits = outputs.end_logits
|
||||||
>>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
|
>>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
|
||||||
|
|
||||||
>>> answer_tokens = all_tokens[torch.argmax(start_scores) :torch.argmax(end_scores)+1]
|
>>> answer_tokens = all_tokens[torch.argmax(start_logits) :torch.argmax(end_logits)+1]
|
||||||
>>> answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token
|
>>> answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -979,7 +979,8 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
|
|||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
|
||||||
>>> outputs = model(input_ids)
|
>>> outputs = model(input_ids)
|
||||||
|
|
||||||
>>> prediction_scores, seq_relationship_scores = outputs[:2]
|
>>> prediction_logits = outptus.prediction_logits
|
||||||
|
>>> seq_relationship_logits = outputs.seq_relationship_logits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
@@ -1186,7 +1187,9 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
|
|||||||
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
|
||||||
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
|
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
|
||||||
|
|
||||||
>>> loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
|
>>> outputs = model(**encoding, next_sentence_label=torch.LongTensor([1]))
|
||||||
|
>>> loss = outputs.loss
|
||||||
|
>>> logits = outputs.logits
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
|
|
||||||
|
|||||||
@@ -659,7 +659,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
mc_token_ids = torch.tensor([input_ids.size(-1)-1, input_ids.size(-1)-1]).unsqueeze(0) # Batch size 1
|
mc_token_ids = torch.tensor([input_ids.size(-1)-1, input_ids.size(-1)-1]).unsqueeze(0) # Batch size 1
|
||||||
|
|
||||||
outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
outputs = model(input_ids, mc_token_ids=mc_token_ids)
|
||||||
lm_prediction_scores, mc_prediction_scores = outputs[:2]
|
lm_logits = outputs.lm_logits
|
||||||
|
mc_logits = outputs.mc_logits
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
if "lm_labels" in kwargs:
|
if "lm_labels" in kwargs:
|
||||||
|
|||||||
@@ -989,7 +989,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
|
|||||||
>>> end_positions = torch.tensor([3])
|
>>> end_positions = torch.tensor([3])
|
||||||
|
|
||||||
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||||
>>> loss = outputs[0]
|
>>> loss = outputs.loss
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
|
|
||||||
|
|||||||
@@ -1366,8 +1366,8 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
|
|||||||
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
|
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
|
||||||
|
|
||||||
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels)
|
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels)
|
||||||
loss, next_token_logits = outputs[:2] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
|
loss = outputs.loss
|
||||||
|
next_token_logits = outputs.logits # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)
|
use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)
|
||||||
@@ -1876,7 +1876,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
|
|||||||
>>> end_positions = torch.tensor([3])
|
>>> end_positions = torch.tensor([3])
|
||||||
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
|
||||||
|
|
||||||
>>> loss = outputs[0]
|
>>> loss = outputs.loss
|
||||||
"""
|
"""
|
||||||
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
|
||||||
use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)
|
use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)
|
||||||
|
|||||||
Reference in New Issue
Block a user