Update doc to new model outputs (#5946)
* Update doc to new model outputs * Fix outputs in quicktour
This commit is contained in:
@@ -230,13 +230,18 @@ final activations of the model.
|
||||
|
||||
>>> ## PYTORCH CODE
|
||||
>>> print(pt_outputs)
|
||||
(tensor([[-4.0833, 4.3364],
|
||||
[ 0.0818, -0.0418]], grad_fn=<AddmmBackward>),)
|
||||
SequenceClassifierOutput(loss=None, logits=tensor([[-4.0833, 4.3364],
|
||||
[ 0.0818, -0.0418]], grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> print(tf_outputs)
|
||||
(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
|
||||
array([[-4.0832963 , 4.3364134 ],
|
||||
[ 0.08181238, -0.04178794]], dtype=float32)>,)
|
||||
array([[-4.0832963 , 4.336414 ],
|
||||
[ 0.08181786, -0.04179301]], dtype=float32)>,)
|
||||
|
||||
The model can return more than just the final activations, which is why the PyTorch output is a special class and the
|
||||
TensorFlow output is a tuple. Here we only asked for the final activations, so we get a tuple with one element on the
|
||||
TensorFlow side and a :class:`~transformers.modeling_outputs.SequenceClassifierOutput` with just the ``logits`` field
|
||||
filled on the PyTorch side.
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -249,7 +254,7 @@ Let's apply the SoftMax activation to get predictions.
|
||||
|
||||
>>> ## PYTORCH CODE
|
||||
>>> import torch.nn.functional as F
|
||||
>>> pt_predictions = F.softmax(pt_outputs[0], dim=-1)
|
||||
>>> pt_predictions = F.softmax(pt_outputs.logits, dim=-1)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> import tensorflow as tf
|
||||
>>> tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)
|
||||
@@ -262,7 +267,7 @@ We can see we get the numbers from before:
|
||||
>>> print(tf_predictions)
|
||||
tf.Tensor(
|
||||
[[2.2042994e-04 9.9977952e-01]
|
||||
[5.3086078e-01 4.6913919e-01]], shape=(2, 2), dtype=float32)
|
||||
[5.3086340e-01 4.6913657e-01]], shape=(2, 2), dtype=float32)
|
||||
>>> ## PYTORCH CODE
|
||||
>>> print(pt_predictions)
|
||||
tensor([[2.2043e-04, 9.9978e-01],
|
||||
@@ -285,6 +290,12 @@ training loop. 🤗 Transformers also provides a :class:`~transformers.Trainer`
|
||||
you are using TensorFlow) class to help with your training (taking care of things such as distributed training, mixed
|
||||
precision, etc.). See the :doc:`training tutorial <training>` for more details.
|
||||
|
||||
.. note::
|
||||
|
||||
Pytorch model outputs are special dataclasses so that you can get autocompletion for their attributes in an IDE.
|
||||
They also behave like a tuple or a dictionary (e.g., you can index with an integer, a slice or a string) in which
|
||||
case the attributes not set (that have :obj:`None` values) are ignored.
|
||||
|
||||
Once your model is fine-tuned, you can save it with its tokenizer in the following way:
|
||||
|
||||
::
|
||||
|
||||
@@ -98,8 +98,8 @@ of each other. The process is the following:
|
||||
>>> paraphrase = tokenizer(sequence_0, sequence_2, return_tensors="pt")
|
||||
>>> not_paraphrase = tokenizer(sequence_0, sequence_1, return_tensors="pt")
|
||||
|
||||
>>> paraphrase_classification_logits = model(**paraphrase)[0]
|
||||
>>> not_paraphrase_classification_logits = model(**not_paraphrase)[0]
|
||||
>>> paraphrase_classification_logits = model(**paraphrase).logits
|
||||
>>> not_paraphrase_classification_logits = model(**not_paraphrase).logits
|
||||
|
||||
>>> paraphrase_results = torch.softmax(paraphrase_classification_logits, dim=1).tolist()[0]
|
||||
>>> not_paraphrase_results = torch.softmax(not_paraphrase_classification_logits, dim=1).tolist()[0]
|
||||
@@ -375,7 +375,7 @@ Here is an example doing masked language modeling using a model and a tokenizer.
|
||||
>>> input = tokenizer.encode(sequence, return_tensors="pt")
|
||||
>>> mask_token_index = torch.where(input == tokenizer.mask_token_id)[1]
|
||||
|
||||
>>> token_logits = model(input)[0]
|
||||
>>> token_logits = model(input).logits
|
||||
>>> mask_token_logits = token_logits[0, mask_token_index, :]
|
||||
|
||||
>>> top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
||||
@@ -436,7 +436,7 @@ Here is an example using the tokenizer and model and leveraging the :func:`~tran
|
||||
>>> input_ids = tokenizer.encode(sequence, return_tensors="pt")
|
||||
|
||||
>>> # get logits of last hidden state
|
||||
>>> next_token_logits = model(input_ids)[0][:, -1, :]
|
||||
>>> next_token_logits = model(input_ids).logits[:, -1, :]
|
||||
|
||||
>>> # filter
|
||||
>>> filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0)
|
||||
@@ -666,7 +666,7 @@ Here is an example doing named entity recognition using a model and a tokenizer.
|
||||
>>> tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sequence)))
|
||||
>>> inputs = tokenizer.encode(sequence, return_tensors="pt")
|
||||
|
||||
>>> outputs = model(inputs)[0]
|
||||
>>> outputs = model(inputs).logits
|
||||
>>> predictions = torch.argmax(outputs, dim=2)
|
||||
>>> ## TENSORFLOW CODE
|
||||
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer
|
||||
|
||||
@@ -99,7 +99,7 @@ backwards pass and update the weights:
|
||||
|
||||
labels = torch.tensor([1,0]).unsqueeze(0)
|
||||
outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
|
||||
loss = outputs[0]
|
||||
loss = outputs.loss
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
@@ -111,7 +111,7 @@ The following is equivalent to the previous example:
|
||||
from torch.nn import functional as F
|
||||
labels = torch.tensor([1,0]).unsqueeze(0)
|
||||
outputs = model(input_ids, attention_mask=attention_mask)
|
||||
loss = F.cross_entropy(labels, outputs[0])
|
||||
loss = F.cross_entropy(labels, outputs.logitd)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user