[Docs] past_key_values return a tuple of tuple as a default (#9381)
* push * make style
This commit is contained in:
committed by
GitHub
parent
5f7a07c0c8
commit
b01f451ca3
@@ -13,7 +13,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List, Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@@ -88,11 +88,14 @@ class BaseModelOutputWithPast(ModelOutput):
|
|||||||
|
|
||||||
If :obj:`past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size,
|
If :obj:`past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size,
|
||||||
1, hidden_size)` is output.
|
1, hidden_size)` is output.
|
||||||
past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2,
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
batch_size, num_heads, sequence_length, embed_size_per_head)`).
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
||||||
|
``config.is_encoder_decoder=True`` 2 additional tensors of shape :obj:`(batch_size, num_heads,
|
||||||
|
encoder_sequence_length, embed_size_per_head)`.
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
|
Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
|
||||||
|
``config.is_encoder_decoder=True`` in the cross-attention blocks) that can be used (see
|
||||||
:obj:`past_key_values` input) to speed up sequential decoding.
|
:obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||||
@@ -108,7 +111,7 @@ class BaseModelOutputWithPast(ModelOutput):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
last_hidden_state: torch.FloatTensor = None
|
last_hidden_state: torch.FloatTensor = None
|
||||||
past_key_values: Optional[List[torch.FloatTensor]] = None
|
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||||
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
|
|
||||||
@@ -176,11 +179,13 @@ class BaseModelOutputWithPoolingAndCrossAttentions(ModelOutput):
|
|||||||
Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
|
Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
|
||||||
weighted average in the cross-attention heads.
|
weighted average in the cross-attention heads.
|
||||||
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` tuples of length :obj:`config.n_layers`, with each tuple containing the
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
cached key, value states of the self-attention and the cross-attention layers if model is used in
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
||||||
encoder-decoder setting. Only relevant if ``config.is_decoder = True``.
|
``config.is_encoder_decoder=True`` 2 additional tensors of shape :obj:`(batch_size, num_heads,
|
||||||
|
encoder_sequence_length, embed_size_per_head)`.
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
|
Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
|
||||||
|
``config.is_encoder_decoder=True`` in the cross-attention blocks) that can be used (see
|
||||||
:obj:`past_key_values` input) to speed up sequential decoding.
|
:obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -203,11 +208,14 @@ class BaseModelOutputWithPastAndCrossAttentions(ModelOutput):
|
|||||||
|
|
||||||
If :obj:`past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size,
|
If :obj:`past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size,
|
||||||
1, hidden_size)` is output.
|
1, hidden_size)` is output.
|
||||||
past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2,
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
batch_size, num_heads, sequence_length, embed_size_per_head)`).
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
||||||
|
``config.is_encoder_decoder=True`` 2 additional tensors of shape :obj:`(batch_size, num_heads,
|
||||||
|
encoder_sequence_length, embed_size_per_head)`.
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
|
Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
|
||||||
|
``config.is_encoder_decoder=True`` in the cross-attention blocks) that can be used (see
|
||||||
:obj:`past_key_values` input) to speed up sequential decoding.
|
:obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||||
@@ -229,7 +237,7 @@ class BaseModelOutputWithPastAndCrossAttentions(ModelOutput):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
last_hidden_state: torch.FloatTensor = None
|
last_hidden_state: torch.FloatTensor = None
|
||||||
past_key_values: Optional[List[torch.FloatTensor]] = None
|
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||||
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
@@ -247,12 +255,13 @@ class Seq2SeqModelOutput(ModelOutput):
|
|||||||
|
|
||||||
If :obj:`past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size,
|
If :obj:`past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size,
|
||||||
1, hidden_size)` is output.
|
1, hidden_size)` is output.
|
||||||
past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2,
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
batch_size, num_heads, sequence_length, embed_size_per_head)`).
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
|
||||||
|
shape :obj:`(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
|
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
|
||||||
used (see :obj:`past_key_values` input) to speed up sequential decoding.
|
blocks) that can be used (see :obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||||
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
||||||
@@ -286,7 +295,7 @@ class Seq2SeqModelOutput(ModelOutput):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
last_hidden_state: torch.FloatTensor = None
|
last_hidden_state: torch.FloatTensor = None
|
||||||
past_key_values: Optional[List[torch.FloatTensor]] = None
|
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
@@ -334,11 +343,11 @@ class CausalLMOutputWithPast(ModelOutput):
|
|||||||
Language modeling loss (for next-token prediction).
|
Language modeling loss (for next-token prediction).
|
||||||
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
|
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
|
||||||
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
|
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
|
||||||
past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tupel(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2,
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
batch_size, num_heads, sequence_length, embed_size_per_head)`).
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`)
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
|
Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
|
||||||
:obj:`past_key_values` input) to speed up sequential decoding.
|
:obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||||
@@ -355,7 +364,7 @@ class CausalLMOutputWithPast(ModelOutput):
|
|||||||
|
|
||||||
loss: Optional[torch.FloatTensor] = None
|
loss: Optional[torch.FloatTensor] = None
|
||||||
logits: torch.FloatTensor = None
|
logits: torch.FloatTensor = None
|
||||||
past_key_values: Optional[List[torch.FloatTensor]] = None
|
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||||
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
|
|
||||||
@@ -414,12 +423,12 @@ class SequenceClassifierOutputWithPast(ModelOutput):
|
|||||||
Classification (or regression if config.num_labels==1) loss.
|
Classification (or regression if config.num_labels==1) loss.
|
||||||
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
|
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
|
||||||
Classification (or regression if config.num_labels==1) scores (before SoftMax).
|
Classification (or regression if config.num_labels==1) scores (before SoftMax).
|
||||||
past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tupel(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2,
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
batch_size, num_heads, sequence_length, embed_size_per_head)`).
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`)
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
|
Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
|
||||||
``past_key_values`` input) to speed up sequential decoding.
|
:obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||||
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
||||||
@@ -435,7 +444,7 @@ class SequenceClassifierOutputWithPast(ModelOutput):
|
|||||||
|
|
||||||
loss: Optional[torch.FloatTensor] = None
|
loss: Optional[torch.FloatTensor] = None
|
||||||
logits: torch.FloatTensor = None
|
logits: torch.FloatTensor = None
|
||||||
past_key_values: Optional[List[torch.FloatTensor]] = None
|
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||||
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
|
|
||||||
@@ -479,12 +488,13 @@ class Seq2SeqLMOutput(ModelOutput):
|
|||||||
Language modeling loss.
|
Language modeling loss.
|
||||||
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
|
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
|
||||||
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
|
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
|
||||||
past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2,
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
batch_size, num_heads, sequence_length, embed_size_per_head)`).
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
|
||||||
|
shape :obj:`(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
|
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
|
||||||
used (see :obj:`past_key_values` input) to speed up sequential decoding.
|
blocks) that can be used (see :obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||||
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
||||||
@@ -519,7 +529,7 @@ class Seq2SeqLMOutput(ModelOutput):
|
|||||||
|
|
||||||
loss: Optional[torch.FloatTensor] = None
|
loss: Optional[torch.FloatTensor] = None
|
||||||
logits: torch.FloatTensor = None
|
logits: torch.FloatTensor = None
|
||||||
past_key_values: Optional[List[torch.FloatTensor]] = None
|
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
@@ -597,12 +607,13 @@ class Seq2SeqSequenceClassifierOutput(ModelOutput):
|
|||||||
Classification (or regression if config.num_labels==1) loss.
|
Classification (or regression if config.num_labels==1) loss.
|
||||||
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
|
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
|
||||||
Classification (or regression if config.num_labels==1) scores (before SoftMax).
|
Classification (or regression if config.num_labels==1) scores (before SoftMax).
|
||||||
past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2,
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
batch_size, num_heads, sequence_length, embed_size_per_head)`).
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
|
||||||
|
shape :obj:`(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
|
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
|
||||||
used (see :obj:`past_key_values` input) to speed up sequential decoding.
|
blocks) that can be used (see :obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||||
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
||||||
@@ -637,7 +648,7 @@ class Seq2SeqSequenceClassifierOutput(ModelOutput):
|
|||||||
|
|
||||||
loss: Optional[torch.FloatTensor] = None
|
loss: Optional[torch.FloatTensor] = None
|
||||||
logits: torch.FloatTensor = None
|
logits: torch.FloatTensor = None
|
||||||
past_key_values: Optional[List[torch.FloatTensor]] = None
|
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
@@ -750,12 +761,13 @@ class Seq2SeqQuestionAnsweringModelOutput(ModelOutput):
|
|||||||
Span-start scores (before SoftMax).
|
Span-start scores (before SoftMax).
|
||||||
end_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`):
|
end_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`):
|
||||||
Span-end scores (before SoftMax).
|
Span-end scores (before SoftMax).
|
||||||
past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
|
||||||
List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape :obj:`(2,
|
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
|
||||||
batch_size, num_heads, sequence_length, embed_size_per_head)`).
|
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
|
||||||
|
shape :obj:`(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
|
||||||
|
|
||||||
Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
|
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
|
||||||
used (see :obj:`past_key_values` input) to speed up sequential decoding.
|
blocks) that can be used (see :obj:`past_key_values` input) to speed up sequential decoding.
|
||||||
decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
|
||||||
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
|
||||||
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
||||||
@@ -791,7 +803,7 @@ class Seq2SeqQuestionAnsweringModelOutput(ModelOutput):
|
|||||||
loss: Optional[torch.FloatTensor] = None
|
loss: Optional[torch.FloatTensor] = None
|
||||||
start_logits: torch.FloatTensor = None
|
start_logits: torch.FloatTensor = None
|
||||||
end_logits: torch.FloatTensor = None
|
end_logits: torch.FloatTensor = None
|
||||||
past_key_values: Optional[List[torch.FloatTensor]] = None
|
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
||||||
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
||||||
|
|||||||
Reference in New Issue
Block a user