Generate: consolidate output classes (#28494)
This commit is contained in:
@@ -45,7 +45,7 @@ inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
|
|||||||
generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
The `generation_output` object is a [`~generation.GreedySearchDecoderOnlyOutput`], as we can
|
The `generation_output` object is a [`~generation.GenerateDecoderOnlyOutput`], as we can
|
||||||
see in the documentation of that class below, it means it has the following attributes:
|
see in the documentation of that class below, it means it has the following attributes:
|
||||||
|
|
||||||
- `sequences`: the generated sequences of tokens
|
- `sequences`: the generated sequences of tokens
|
||||||
@@ -77,25 +77,13 @@ We document here all output types.
|
|||||||
|
|
||||||
### PyTorch
|
### PyTorch
|
||||||
|
|
||||||
[[autodoc]] generation.GreedySearchEncoderDecoderOutput
|
[[autodoc]] generation.GenerateDecoderOnlyOutput
|
||||||
|
|
||||||
[[autodoc]] generation.GreedySearchDecoderOnlyOutput
|
[[autodoc]] generation.GenerateEncoderDecoderOutput
|
||||||
|
|
||||||
[[autodoc]] generation.SampleEncoderDecoderOutput
|
[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
|
||||||
|
|
||||||
[[autodoc]] generation.SampleDecoderOnlyOutput
|
[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSearchEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSearchDecoderOnlyOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSampleEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSampleDecoderOnlyOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.ContrastiveSearchEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.ContrastiveSearchDecoderOnlyOutput
|
|
||||||
|
|
||||||
### TensorFlow
|
### TensorFlow
|
||||||
|
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
|
|||||||
generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
`generation_output` オブジェクトは、できる限り [`~generation.GreedySearchDecoderOnlyOutput`] です。
|
`generation_output` オブジェクトは、できる限り [`~generation.GenerateDecoderOnlyOutput`] です。
|
||||||
以下のそのクラスのドキュメントを参照してください。これは、次の属性があることを意味します。
|
以下のそのクラスのドキュメントを参照してください。これは、次の属性があることを意味します。
|
||||||
|
|
||||||
- `sequences`: 生成されたトークンのシーケンス
|
- `sequences`: 生成されたトークンのシーケンス
|
||||||
@@ -76,25 +76,13 @@ generation_output[:2]
|
|||||||
|
|
||||||
### PyTorch
|
### PyTorch
|
||||||
|
|
||||||
[[autodoc]] generation.GreedySearchEncoderDecoderOutput
|
[[autodoc]] generation.GenerateDecoderOnlyOutput
|
||||||
|
|
||||||
[[autodoc]] generation.GreedySearchDecoderOnlyOutput
|
[[autodoc]] generation.GenerateEncoderDecoderOutput
|
||||||
|
|
||||||
[[autodoc]] generation.SampleEncoderDecoderOutput
|
[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
|
||||||
|
|
||||||
[[autodoc]] generation.SampleDecoderOnlyOutput
|
[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSearchEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSearchDecoderOnlyOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSampleEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSampleDecoderOnlyOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.ContrastiveSearchEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.ContrastiveSearchDecoderOnlyOutput
|
|
||||||
|
|
||||||
### TensorFlow
|
### TensorFlow
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
|
|||||||
generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
`generation_output` 的对象是 [`~generation.GreedySearchDecoderOnlyOutput`] 的一个实例,从该类的文档中我们可以看到,这意味着它具有以下属性:
|
`generation_output` 的对象是 [`~generation.GenerateDecoderOnlyOutput`] 的一个实例,从该类的文档中我们可以看到,这意味着它具有以下属性:
|
||||||
|
|
||||||
- `sequences`: 生成的tokens序列
|
- `sequences`: 生成的tokens序列
|
||||||
- `scores`(可选): 每个生成步骤的语言建模头的预测分数
|
- `scores`(可选): 每个生成步骤的语言建模头的预测分数
|
||||||
@@ -70,25 +70,13 @@ generation_output[:2]
|
|||||||
|
|
||||||
### PyTorch
|
### PyTorch
|
||||||
|
|
||||||
[[autodoc]] generation.GreedySearchEncoderDecoderOutput
|
[[autodoc]] generation.GenerateDecoderOnlyOutput
|
||||||
|
|
||||||
[[autodoc]] generation.GreedySearchDecoderOnlyOutput
|
[[autodoc]] generation.GenerateEncoderDecoderOutput
|
||||||
|
|
||||||
[[autodoc]] generation.SampleEncoderDecoderOutput
|
[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
|
||||||
|
|
||||||
[[autodoc]] generation.SampleDecoderOnlyOutput
|
[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSearchEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSearchDecoderOnlyOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSampleEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.BeamSampleDecoderOnlyOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.ContrastiveSearchEncoderDecoderOutput
|
|
||||||
|
|
||||||
[[autodoc]] generation.ContrastiveSearchDecoderOnlyOutput
|
|
||||||
|
|
||||||
### TensorFlow
|
### TensorFlow
|
||||||
|
|
||||||
|
|||||||
@@ -94,6 +94,10 @@ else:
|
|||||||
"BeamSampleDecoderOnlyOutput",
|
"BeamSampleDecoderOnlyOutput",
|
||||||
"ContrastiveSearchEncoderDecoderOutput",
|
"ContrastiveSearchEncoderDecoderOutput",
|
||||||
"ContrastiveSearchDecoderOnlyOutput",
|
"ContrastiveSearchDecoderOnlyOutput",
|
||||||
|
"GenerateBeamDecoderOnlyOutput",
|
||||||
|
"GenerateBeamEncoderDecoderOutput",
|
||||||
|
"GenerateDecoderOnlyOutput",
|
||||||
|
"GenerateEncoderDecoderOutput",
|
||||||
]
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -222,6 +226,10 @@ if TYPE_CHECKING:
|
|||||||
BeamSearchEncoderDecoderOutput,
|
BeamSearchEncoderDecoderOutput,
|
||||||
ContrastiveSearchDecoderOnlyOutput,
|
ContrastiveSearchDecoderOnlyOutput,
|
||||||
ContrastiveSearchEncoderDecoderOutput,
|
ContrastiveSearchEncoderDecoderOutput,
|
||||||
|
GenerateBeamDecoderOnlyOutput,
|
||||||
|
GenerateBeamEncoderDecoderOutput,
|
||||||
|
GenerateDecoderOnlyOutput,
|
||||||
|
GenerateEncoderDecoderOutput,
|
||||||
GenerationMixin,
|
GenerationMixin,
|
||||||
GreedySearchDecoderOnlyOutput,
|
GreedySearchDecoderOnlyOutput,
|
||||||
GreedySearchEncoderDecoderOutput,
|
GreedySearchEncoderDecoderOutput,
|
||||||
|
|||||||
@@ -94,10 +94,9 @@ if is_accelerate_available():
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class GreedySearchDecoderOnlyOutput(ModelOutput):
|
class GenerateDecoderOnlyOutput(ModelOutput):
|
||||||
"""
|
"""
|
||||||
Base class for outputs of decoder-only generation models using greedy search.
|
Outputs of decoder-only generation models, when using non-beam methods.
|
||||||
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
||||||
@@ -130,9 +129,9 @@ class GreedySearchDecoderOnlyOutput(ModelOutput):
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ContrastiveSearchEncoderDecoderOutput(ModelOutput):
|
class GenerateEncoderDecoderOutput(ModelOutput):
|
||||||
"""
|
"""
|
||||||
Base class for outputs of decoder-only generation models using contrastive search.
|
Outputs of encoder-decider generation models, when using non-beam methods.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
||||||
@@ -177,184 +176,9 @@ class ContrastiveSearchEncoderDecoderOutput(ModelOutput):
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ContrastiveSearchDecoderOnlyOutput(ModelOutput):
|
class GenerateBeamDecoderOnlyOutput(ModelOutput):
|
||||||
"""
|
"""
|
||||||
Base class for outputs of decoder-only generation models using contrastive search.
|
Outputs of decoder-only generation models, when using beam methods.
|
||||||
|
|
||||||
Args:
|
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
|
||||||
The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
|
|
||||||
if all batches finished early due to the `eos_token_id`.
|
|
||||||
scores (`tuple(torch.FloatTensor)` *optional*, returned when `output_scores=True` is passed or when
|
|
||||||
`config.output_scores=True`):
|
|
||||||
Processed prediction scores of the language modeling head (scores for each vocabulary token before SoftMax)
|
|
||||||
at each generation step. Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for
|
|
||||||
each generated token), with each tensor of shape `(batch_size, config.vocab_size)`.
|
|
||||||
attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`.
|
|
||||||
hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True` is
|
|
||||||
passed or when `config.output_hidden_states=True`): Tuple (one element for each generated token) of tuples
|
|
||||||
(one element for each layer of the decoder) of `torch.FloatTensor` of shape `(batch_size, generated_length,
|
|
||||||
hidden_size)`.
|
|
||||||
past_key_values (`tuple(tuple(torch.FloatTensor)))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
|
|
||||||
NOTE: some models have a different `past_key_values` format, confirm with the model's documentation.
|
|
||||||
Usually a Tuple (one element for each layer of the decoder) of tuples (two elements, key tensor and value
|
|
||||||
tensor). The first Tuple is of length `config.n_layers`, with each tuple having 2 tensors of shape
|
|
||||||
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
|
||||||
`config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
|
|
||||||
encoder_sequence_length, embed_size_per_head)`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
sequences: torch.LongTensor = None
|
|
||||||
scores: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
hidden_states: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
past_key_values: Optional[Tuple[Tuple[Tuple[torch.FloatTensor]]]] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class GreedySearchEncoderDecoderOutput(ModelOutput):
|
|
||||||
"""
|
|
||||||
Base class for outputs of encoder-decoder generation models using greedy search. Hidden states and attention
|
|
||||||
weights of the decoder (respectively the encoder) can be accessed via the encoder_attentions and the
|
|
||||||
encoder_hidden_states attributes (respectively the decoder_attentions and the decoder_hidden_states attributes)
|
|
||||||
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
|
||||||
The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
|
|
||||||
if all batches finished early due to the `eos_token_id`.
|
|
||||||
scores (`tuple(torch.FloatTensor)` *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Processed prediction scores of the language modeling head (scores for each vocabulary token before SoftMax)
|
|
||||||
at each generation step. Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for
|
|
||||||
each generated token), with each tensor of shape `(batch_size, config.vocab_size)`.
|
|
||||||
encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple of `torch.FloatTensor` (one for each layer of the decoder) of shape `(batch_size, num_heads,
|
|
||||||
sequence_length, sequence_length)`.
|
|
||||||
encoder_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
|
||||||
Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
|
|
||||||
shape `(batch_size, sequence_length, hidden_size)`.
|
|
||||||
decoder_attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`.
|
|
||||||
cross_attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`.
|
|
||||||
decoder_hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size, generated_length, hidden_size)`.
|
|
||||||
past_key_values (`tuple(tuple(torch.FloatTensor)))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
|
|
||||||
NOTE: some models have a different `past_key_values` format, confirm with the model's documentation.
|
|
||||||
Usually a Tuple (one element for each layer of the decoder) of tuples (two elements, key tensor and value
|
|
||||||
tensor). The first Tuple is of length `config.n_layers`, with each tuple having 2 tensors of shape
|
|
||||||
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
|
||||||
`config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
|
|
||||||
encoder_sequence_length, embed_size_per_head)`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
sequences: torch.LongTensor = None
|
|
||||||
scores: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
decoder_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
cross_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
decoder_hidden_states: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
past_key_values: Optional[Tuple[Tuple[Tuple[torch.FloatTensor]]]] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SampleDecoderOnlyOutput(ModelOutput):
|
|
||||||
"""
|
|
||||||
Base class for outputs of decoder-only generation models using sampling.
|
|
||||||
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size*num_return_sequences, sequence_length)`):
|
|
||||||
The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
|
|
||||||
if all batches finished early due to the `eos_token_id`.
|
|
||||||
scores (`tuple(torch.FloatTensor)` *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Processed prediction scores of the language modeling head (scores for each vocabulary token before SoftMax)
|
|
||||||
at each generation step. Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for
|
|
||||||
each generated token), with each tensor of shape `(batch_size*num_return_sequences, config.vocab_size)`.
|
|
||||||
attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(num_return_sequences*batch_size, num_heads, generated_length,
|
|
||||||
sequence_length)`.
|
|
||||||
hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(num_return_sequences*batch_size, generated_length, hidden_size)`.
|
|
||||||
past_key_values (`tuple(tuple(torch.FloatTensor)))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
|
|
||||||
NOTE: some models have a different `past_key_values` format, confirm with the model's documentation.
|
|
||||||
Usually a Tuple (one element for each layer of the decoder) of tuples (two elements, key tensor and value
|
|
||||||
tensor). The first Tuple is of length `config.n_layers`, with each tuple having 2 tensors of shape
|
|
||||||
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
|
||||||
`config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
|
|
||||||
encoder_sequence_length, embed_size_per_head)`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
sequences: torch.LongTensor = None
|
|
||||||
scores: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
hidden_states: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
past_key_values: Optional[Tuple[Tuple[Tuple[torch.FloatTensor]]]] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SampleEncoderDecoderOutput(ModelOutput):
|
|
||||||
"""
|
|
||||||
Base class for outputs of encoder-decoder generation models using sampling. Hidden states and attention weights of
|
|
||||||
the decoder (respectively the encoder) can be accessed via the encoder_attentions and the encoder_hidden_states
|
|
||||||
attributes (respectively the decoder_attentions and the decoder_hidden_states attributes)
|
|
||||||
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size*num_return_sequences, sequence_length)`):
|
|
||||||
The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
|
|
||||||
if all batches finished early due to the `eos_token_id`.
|
|
||||||
scores (`tuple(torch.FloatTensor)` *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Processed prediction scores of the language modeling head (scores for each vocabulary token before SoftMax)
|
|
||||||
at each generation step. Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for
|
|
||||||
each generated token), with each tensor of shape `(batch_size*num_return_sequences, config.vocab_size)`.
|
|
||||||
encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple of `torch.FloatTensor` (one for each layer of the decoder) of shape
|
|
||||||
`(batch_size*num_return_sequences, num_heads, sequence_length, sequence_length)`.
|
|
||||||
encoder_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
|
||||||
Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
|
|
||||||
shape `(batch_size*num_return_sequences, sequence_length, hidden_size)`.
|
|
||||||
decoder_attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size*num_return_sequences, num_heads, generated_length,
|
|
||||||
sequence_length)`.
|
|
||||||
cross_attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`.
|
|
||||||
decoder_hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size*num_return_sequences, generated_length, hidden_size)`.
|
|
||||||
past_key_values (`tuple(tuple(torch.FloatTensor)))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
|
|
||||||
NOTE: some models have a different `past_key_values` format, confirm with the model's documentation.
|
|
||||||
Usually a Tuple (one element for each layer of the decoder) of tuples (two elements, key tensor and value
|
|
||||||
tensor). The first Tuple is of length `config.n_layers`, with each tuple having 2 tensors of shape
|
|
||||||
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
|
||||||
`config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
|
|
||||||
encoder_sequence_length, embed_size_per_head)`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
sequences: torch.LongTensor = None
|
|
||||||
scores: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
decoder_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
cross_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
decoder_hidden_states: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
past_key_values: Optional[Tuple[Tuple[Tuple[torch.FloatTensor]]]] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class BeamSearchDecoderOnlyOutput(ModelOutput):
|
|
||||||
"""
|
|
||||||
Base class for outputs of decoder-only generation models using beam search.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size*num_return_sequences, sequence_length)`):
|
sequences (`torch.LongTensor` of shape `(batch_size*num_return_sequences, sequence_length)`):
|
||||||
@@ -395,11 +219,9 @@ class BeamSearchDecoderOnlyOutput(ModelOutput):
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BeamSearchEncoderDecoderOutput(ModelOutput):
|
class GenerateBeamEncoderDecoderOutput(ModelOutput):
|
||||||
"""
|
"""
|
||||||
Base class for outputs of encoder-decoder generation models using beam search. Hidden states and attention weights
|
Outputs of encoder-decoder generation models, when using beam methods.
|
||||||
of the decoder (respectively the encoder) can be accessed via the encoder_attentions and the encoder_hidden_states
|
|
||||||
attributes (respectively the decoder_attentions and the decoder_hidden_states attributes)
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size*num_return_sequences, sequence_length)`):
|
sequences (`torch.LongTensor` of shape `(batch_size*num_return_sequences, sequence_length)`):
|
||||||
@@ -452,112 +274,26 @@ class BeamSearchEncoderDecoderOutput(ModelOutput):
|
|||||||
past_key_values: Optional[Tuple[Tuple[Tuple[torch.FloatTensor]]]] = None
|
past_key_values: Optional[Tuple[Tuple[Tuple[torch.FloatTensor]]]] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
# Equivalent classes (kept for retrocompatibility purposes)
|
||||||
class BeamSampleDecoderOnlyOutput(ModelOutput):
|
GreedySearchDecoderOnlyOutput = GenerateDecoderOnlyOutput
|
||||||
"""
|
ContrastiveSearchDecoderOnlyOutput = GenerateDecoderOnlyOutput
|
||||||
Base class for outputs of decoder-only generation models using beam sample.
|
SampleDecoderOnlyOutput = GenerateDecoderOnlyOutput
|
||||||
|
|
||||||
Args:
|
ContrastiveSearchEncoderDecoderOutput = GenerateEncoderDecoderOutput
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size*num_return_sequences, sequence_length)`):
|
GreedySearchEncoderDecoderOutput = GenerateEncoderDecoderOutput
|
||||||
The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
|
SampleEncoderDecoderOutput = GenerateEncoderDecoderOutput
|
||||||
if all batches finished early due to the `eos_token_id`.
|
|
||||||
sequences_scores (`torch.FloatTensor` of shape `(batch_size * num_return_sequence)`, *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Final beam scores of the generated `sequences`.
|
|
||||||
scores (`tuple(torch.FloatTensor)` *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Beam transition scores for each vocabulary token at each generation step. Beam transition scores consisting
|
|
||||||
of log probabilities of tokens conditioned on log softmax of previously generated tokens in this beam.
|
|
||||||
Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for each generated token),
|
|
||||||
with each tensor of shape `(batch_size*num_beams*num_return_sequences, config.vocab_size)`.
|
|
||||||
beam_indices (`torch.LongTensor`, *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Beam indices of generated token id at each generation step. `torch.LongTensor` of shape
|
|
||||||
`(batch_size*num_return_sequences, sequence_length)`.
|
|
||||||
attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size*num_beams, num_heads, generated_length, sequence_length)`.
|
|
||||||
hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size*num_beams, generated_length, hidden_size)`.
|
|
||||||
past_key_values (`tuple(tuple(torch.FloatTensor)))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
|
|
||||||
NOTE: some models have a different `past_key_values` format, confirm with the model's documentation.
|
|
||||||
Usually a Tuple (one element for each layer of the decoder) of tuples (two elements, key tensor and value
|
|
||||||
tensor). The first Tuple is of length `config.n_layers`, with each tuple having 2 tensors of shape
|
|
||||||
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
|
||||||
`config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
|
|
||||||
encoder_sequence_length, embed_size_per_head)`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
sequences: torch.LongTensor = None
|
BeamSearchDecoderOnlyOutput = GenerateBeamDecoderOnlyOutput
|
||||||
sequences_scores: Optional[torch.FloatTensor] = None
|
BeamSampleDecoderOnlyOutput = GenerateBeamDecoderOnlyOutput
|
||||||
scores: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
beam_indices: Optional[torch.LongTensor] = None
|
BeamSearchEncoderDecoderOutput = GenerateBeamEncoderDecoderOutput
|
||||||
attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
BeamSampleEncoderDecoderOutput = GenerateBeamEncoderDecoderOutput
|
||||||
hidden_states: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
past_key_values: Optional[Tuple[Tuple[Tuple[torch.FloatTensor]]]] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
# Typing shortcuts
|
||||||
class BeamSampleEncoderDecoderOutput(ModelOutput):
|
GenerateNonBeamOutput = Union[GenerateDecoderOnlyOutput, GenerateEncoderDecoderOutput]
|
||||||
"""
|
GenerateBeamOutput = Union[GenerateBeamDecoderOnlyOutput, GenerateBeamEncoderDecoderOutput]
|
||||||
Base class for outputs of encoder-decoder generation models using beam sampling. Hidden states and attention
|
GenerateOutput = Union[GenerateNonBeamOutput, GenerateBeamOutput]
|
||||||
weights of the decoder (respectively the encoder) can be accessed via the encoder_attentions and the
|
|
||||||
encoder_hidden_states attributes (respectively the decoder_attentions and the decoder_hidden_states attributes)
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sequences (`torch.LongTensor` of shape `(batch_size*num_beams, sequence_length)`):
|
|
||||||
The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
|
|
||||||
if all batches finished early due to the `eos_token_id`.
|
|
||||||
sequences_scores (`torch.FloatTensor` of shape `(batch_size * num_return_sequence)`, *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Final beam scores of the generated `sequences`.
|
|
||||||
scores (`tuple(torch.FloatTensor)` *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Beam transition scores for each vocabulary token at each generation step. Beam transition scores consisting
|
|
||||||
of log probabilities of tokens conditioned on log softmax of previously generated tokens in this beam.
|
|
||||||
Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for each generated token),
|
|
||||||
with each tensor of shape `(batch_size*num_beams, config.vocab_size)`).
|
|
||||||
beam_indices (`torch.LongTensor`, *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`):
|
|
||||||
Beam indices of generated token id at each generation step. `torch.LongTensor` of shape
|
|
||||||
`(batch_size*num_return_sequences, sequence_length)`.
|
|
||||||
encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple of `torch.FloatTensor` (one for each layer of the decoder) of shape `(batch_size, num_heads,
|
|
||||||
sequence_length, sequence_length)`.
|
|
||||||
encoder_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
|
||||||
Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
|
|
||||||
shape `(batch_size*num_beams, sequence_length, hidden_size)`.
|
|
||||||
decoder_attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size*num_beams, num_heads, generated_length, sequence_length)`.
|
|
||||||
cross_attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`.
|
|
||||||
decoder_hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
|
||||||
Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
|
|
||||||
`torch.FloatTensor` of shape `(batch_size*num_beams, generated_length, hidden_size)`.
|
|
||||||
past_key_values (`tuple(tuple(torch.FloatTensor)))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
|
|
||||||
NOTE: some models have a different `past_key_values` format, confirm with the model's documentation.
|
|
||||||
Usually a Tuple (one element for each layer of the decoder) of tuples (two elements, key tensor and value
|
|
||||||
tensor). The first Tuple is of length `config.n_layers`, with each tuple having 2 tensors of shape
|
|
||||||
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
|
|
||||||
`config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
|
|
||||||
encoder_sequence_length, embed_size_per_head)`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
sequences: torch.LongTensor = None
|
|
||||||
sequences_scores: Optional[torch.FloatTensor] = None
|
|
||||||
scores: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
beam_indices: Optional[torch.LongTensor] = None
|
|
||||||
encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
|
|
||||||
decoder_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
cross_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
decoder_hidden_states: Optional[Tuple[Tuple[torch.FloatTensor]]] = None
|
|
||||||
past_key_values: Optional[Tuple[Tuple[Tuple[torch.FloatTensor]]]] = None
|
|
||||||
|
|
||||||
|
|
||||||
GreedySearchOutput = Union[GreedySearchEncoderDecoderOutput, GreedySearchDecoderOnlyOutput]
|
|
||||||
SampleOutput = Union[SampleEncoderDecoderOutput, SampleDecoderOnlyOutput]
|
|
||||||
BeamSearchOutput = Union[BeamSearchEncoderDecoderOutput, BeamSearchDecoderOnlyOutput]
|
|
||||||
BeamSampleOutput = Union[BeamSampleEncoderDecoderOutput, BeamSampleDecoderOnlyOutput]
|
|
||||||
ContrastiveSearchOutput = Union[ContrastiveSearchEncoderDecoderOutput, ContrastiveSearchDecoderOnlyOutput]
|
|
||||||
GenerateOutput = Union[GreedySearchOutput, SampleOutput, BeamSearchOutput, BeamSampleOutput, ContrastiveSearchOutput]
|
|
||||||
|
|
||||||
|
|
||||||
class GenerationMode(ExplicitEnum):
|
class GenerationMode(ExplicitEnum):
|
||||||
@@ -1516,18 +1252,14 @@ class GenerationMixin:
|
|||||||
If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
|
If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
|
||||||
- [`~generation.GreedySearchDecoderOnlyOutput`],
|
- [`~generation.GenerateDecoderOnlyOutput`],
|
||||||
- [`~generation.SampleDecoderOnlyOutput`],
|
- [`~generation.GenerateBeamDecoderOnlyOutput`]
|
||||||
- [`~generation.BeamSearchDecoderOnlyOutput`],
|
|
||||||
- [`~generation.BeamSampleDecoderOnlyOutput`]
|
|
||||||
|
|
||||||
If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
|
If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if synced_gpus is None:
|
if synced_gpus is None:
|
||||||
@@ -1989,7 +1721,7 @@ class GenerationMixin:
|
|||||||
streamer: Optional["BaseStreamer"] = None,
|
streamer: Optional["BaseStreamer"] = None,
|
||||||
sequential: Optional[bool] = None,
|
sequential: Optional[bool] = None,
|
||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
) -> Union[ContrastiveSearchOutput, torch.LongTensor]:
|
) -> Union[GenerateNonBeamOutput, torch.LongTensor]:
|
||||||
r"""
|
r"""
|
||||||
Generates sequences of token ids for models with a language modeling head using **contrastive search** and can
|
Generates sequences of token ids for models with a language modeling head using **contrastive search** and can
|
||||||
be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
||||||
@@ -2045,10 +1777,10 @@ class GenerationMixin:
|
|||||||
If model is an encoder-decoder model the kwargs should include `encoder_outputs`.
|
If model is an encoder-decoder model the kwargs should include `encoder_outputs`.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
[`~generation.ContrastiveSearchDecoderOnlyOutput`], [`~generation.ContrastiveSearchEncoderDecoderOutput`]
|
[`~generation.GenerateDecoderOnlyOutput`], [`~generation.GenerateEncoderDecoderOutput`]
|
||||||
or `torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
or `torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||||
[`~generation.ContrastiveSearchDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
[`~generation.GenerateDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||||
`return_dict_in_generate=True` or a [`~generation.ContrastiveSearchEncoderDecoderOutput`] if
|
`return_dict_in_generate=True` or a [`~generation.GenerateEncoderDecoderOutput`] if
|
||||||
`model.config.is_encoder_decoder=True`.
|
`model.config.is_encoder_decoder=True`.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
@@ -2406,7 +2138,7 @@ class GenerationMixin:
|
|||||||
model_kwargs["past_key_values"] = tuple(past_key_values)
|
model_kwargs["past_key_values"] = tuple(past_key_values)
|
||||||
|
|
||||||
if self.config.is_encoder_decoder:
|
if self.config.is_encoder_decoder:
|
||||||
return ContrastiveSearchEncoderDecoderOutput(
|
return GenerateEncoderDecoderOutput(
|
||||||
sequences=input_ids,
|
sequences=input_ids,
|
||||||
scores=scores,
|
scores=scores,
|
||||||
encoder_attentions=encoder_attentions,
|
encoder_attentions=encoder_attentions,
|
||||||
@@ -2417,7 +2149,7 @@ class GenerationMixin:
|
|||||||
past_key_values=model_kwargs.get("past_key_values"),
|
past_key_values=model_kwargs.get("past_key_values"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return ContrastiveSearchDecoderOnlyOutput(
|
return GenerateDecoderOnlyOutput(
|
||||||
sequences=input_ids,
|
sequences=input_ids,
|
||||||
scores=scores,
|
scores=scores,
|
||||||
attentions=decoder_attentions,
|
attentions=decoder_attentions,
|
||||||
@@ -2442,7 +2174,7 @@ class GenerationMixin:
|
|||||||
synced_gpus: bool = False,
|
synced_gpus: bool = False,
|
||||||
streamer: Optional["BaseStreamer"] = None,
|
streamer: Optional["BaseStreamer"] = None,
|
||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
) -> Union[GreedySearchOutput, torch.LongTensor]:
|
) -> Union[GenerateNonBeamOutput, torch.LongTensor]:
|
||||||
r"""
|
r"""
|
||||||
Generates sequences of token ids for models with a language modeling head using **greedy decoding** and can be
|
Generates sequences of token ids for models with a language modeling head using **greedy decoding** and can be
|
||||||
used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
||||||
@@ -2493,10 +2225,10 @@ class GenerationMixin:
|
|||||||
If model is an encoder-decoder model the kwargs should include `encoder_outputs`.
|
If model is an encoder-decoder model the kwargs should include `encoder_outputs`.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
[`~generation.GreedySearchDecoderOnlyOutput`], [`~generation.GreedySearchEncoderDecoderOutput`] or
|
[`~generation.GenerateDecoderOnlyOutput`], [`~generation.GenerateEncoderDecoderOutput`] or
|
||||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||||
[`~generation.GreedySearchDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
[`~generation.GenerateDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||||
`return_dict_in_generate=True` or a [`~generation.GreedySearchEncoderDecoderOutput`] if
|
`return_dict_in_generate=True` or a [`~generation.GenerateEncoderDecoderOutput`] if
|
||||||
`model.config.is_encoder_decoder=True`.
|
`model.config.is_encoder_decoder=True`.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
@@ -2667,7 +2399,7 @@ class GenerationMixin:
|
|||||||
|
|
||||||
if return_dict_in_generate:
|
if return_dict_in_generate:
|
||||||
if self.config.is_encoder_decoder:
|
if self.config.is_encoder_decoder:
|
||||||
return GreedySearchEncoderDecoderOutput(
|
return GenerateEncoderDecoderOutput(
|
||||||
sequences=input_ids,
|
sequences=input_ids,
|
||||||
scores=scores,
|
scores=scores,
|
||||||
encoder_attentions=encoder_attentions,
|
encoder_attentions=encoder_attentions,
|
||||||
@@ -2678,7 +2410,7 @@ class GenerationMixin:
|
|||||||
past_key_values=model_kwargs.get("past_key_values"),
|
past_key_values=model_kwargs.get("past_key_values"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return GreedySearchDecoderOnlyOutput(
|
return GenerateDecoderOnlyOutput(
|
||||||
sequences=input_ids,
|
sequences=input_ids,
|
||||||
scores=scores,
|
scores=scores,
|
||||||
attentions=decoder_attentions,
|
attentions=decoder_attentions,
|
||||||
@@ -2704,7 +2436,7 @@ class GenerationMixin:
|
|||||||
synced_gpus: bool = False,
|
synced_gpus: bool = False,
|
||||||
streamer: Optional["BaseStreamer"] = None,
|
streamer: Optional["BaseStreamer"] = None,
|
||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
) -> Union[SampleOutput, torch.LongTensor]:
|
) -> Union[GenerateNonBeamOutput, torch.LongTensor]:
|
||||||
r"""
|
r"""
|
||||||
Generates sequences of token ids for models with a language modeling head using **multinomial sampling** and
|
Generates sequences of token ids for models with a language modeling head using **multinomial sampling** and
|
||||||
can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
||||||
@@ -2757,10 +2489,10 @@ class GenerationMixin:
|
|||||||
an encoder-decoder model the kwargs should include `encoder_outputs`.
|
an encoder-decoder model the kwargs should include `encoder_outputs`.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
[`~generation.SampleDecoderOnlyOutput`], [`~generation.SampleEncoderDecoderOutput`] or `torch.LongTensor`:
|
[`~generation.GenerateDecoderOnlyOutput`], [`~generation.GenerateEncoderDecoderOutput`] or `torch.LongTensor`:
|
||||||
A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||||
[`~generation.SampleDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
[`~generation.GenerateDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||||
`return_dict_in_generate=True` or a [`~generation.SampleEncoderDecoderOutput`] if
|
`return_dict_in_generate=True` or a [`~generation.GenerateEncoderDecoderOutput`] if
|
||||||
`model.config.is_encoder_decoder=True`.
|
`model.config.is_encoder_decoder=True`.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
@@ -2951,7 +2683,7 @@ class GenerationMixin:
|
|||||||
|
|
||||||
if return_dict_in_generate:
|
if return_dict_in_generate:
|
||||||
if self.config.is_encoder_decoder:
|
if self.config.is_encoder_decoder:
|
||||||
return SampleEncoderDecoderOutput(
|
return GenerateEncoderDecoderOutput(
|
||||||
sequences=input_ids,
|
sequences=input_ids,
|
||||||
scores=scores,
|
scores=scores,
|
||||||
encoder_attentions=encoder_attentions,
|
encoder_attentions=encoder_attentions,
|
||||||
@@ -2962,7 +2694,7 @@ class GenerationMixin:
|
|||||||
past_key_values=model_kwargs.get("past_key_values"),
|
past_key_values=model_kwargs.get("past_key_values"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return SampleDecoderOnlyOutput(
|
return GenerateDecoderOnlyOutput(
|
||||||
sequences=input_ids,
|
sequences=input_ids,
|
||||||
scores=scores,
|
scores=scores,
|
||||||
attentions=decoder_attentions,
|
attentions=decoder_attentions,
|
||||||
@@ -3013,7 +2745,7 @@ class GenerationMixin:
|
|||||||
return_dict_in_generate: Optional[bool] = None,
|
return_dict_in_generate: Optional[bool] = None,
|
||||||
synced_gpus: bool = False,
|
synced_gpus: bool = False,
|
||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
) -> Union[BeamSearchOutput, torch.LongTensor]:
|
) -> Union[GenerateBeamOutput, torch.LongTensor]:
|
||||||
r"""
|
r"""
|
||||||
Generates sequences of token ids for models with a language modeling head using **beam search decoding** and
|
Generates sequences of token ids for models with a language modeling head using **beam search decoding** and
|
||||||
can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
||||||
@@ -3062,10 +2794,10 @@ class GenerationMixin:
|
|||||||
an encoder-decoder model the kwargs should include `encoder_outputs`.
|
an encoder-decoder model the kwargs should include `encoder_outputs`.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
[`generation.BeamSearchDecoderOnlyOutput`], [`~generation.BeamSearchEncoderDecoderOutput`] or
|
[`generation.GenerateBeamDecoderOnlyOutput`], [`~generation.GenerateBeamEncoderDecoderOutput`] or
|
||||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||||
[`~generation.BeamSearchDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
[`~generation.GenerateBeamDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||||
`return_dict_in_generate=True` or a [`~generation.BeamSearchEncoderDecoderOutput`] if
|
`return_dict_in_generate=True` or a [`~generation.GenerateBeamEncoderDecoderOutput`] if
|
||||||
`model.config.is_encoder_decoder=True`.
|
`model.config.is_encoder_decoder=True`.
|
||||||
|
|
||||||
|
|
||||||
@@ -3304,7 +3036,7 @@ class GenerationMixin:
|
|||||||
sequence_outputs["sequence_scores"] = None
|
sequence_outputs["sequence_scores"] = None
|
||||||
|
|
||||||
if self.config.is_encoder_decoder:
|
if self.config.is_encoder_decoder:
|
||||||
return BeamSearchEncoderDecoderOutput(
|
return GenerateBeamEncoderDecoderOutput(
|
||||||
sequences=sequence_outputs["sequences"],
|
sequences=sequence_outputs["sequences"],
|
||||||
sequences_scores=sequence_outputs["sequence_scores"],
|
sequences_scores=sequence_outputs["sequence_scores"],
|
||||||
scores=scores,
|
scores=scores,
|
||||||
@@ -3317,7 +3049,7 @@ class GenerationMixin:
|
|||||||
past_key_values=model_kwargs.get("past_key_values"),
|
past_key_values=model_kwargs.get("past_key_values"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return BeamSearchDecoderOnlyOutput(
|
return GenerateBeamDecoderOnlyOutput(
|
||||||
sequences=sequence_outputs["sequences"],
|
sequences=sequence_outputs["sequences"],
|
||||||
sequences_scores=sequence_outputs["sequence_scores"],
|
sequences_scores=sequence_outputs["sequence_scores"],
|
||||||
scores=scores,
|
scores=scores,
|
||||||
@@ -3345,7 +3077,7 @@ class GenerationMixin:
|
|||||||
return_dict_in_generate: Optional[bool] = None,
|
return_dict_in_generate: Optional[bool] = None,
|
||||||
synced_gpus: bool = False,
|
synced_gpus: bool = False,
|
||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
) -> Union[BeamSampleOutput, torch.LongTensor]:
|
) -> Union[GenerateBeamOutput, torch.LongTensor]:
|
||||||
r"""
|
r"""
|
||||||
Generates sequences of token ids for models with a language modeling head using **beam search multinomial
|
Generates sequences of token ids for models with a language modeling head using **beam search multinomial
|
||||||
sampling** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
sampling** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
||||||
@@ -3398,10 +3130,10 @@ class GenerationMixin:
|
|||||||
an encoder-decoder model the kwargs should include `encoder_outputs`.
|
an encoder-decoder model the kwargs should include `encoder_outputs`.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
[`~generation.BeamSampleDecoderOnlyOutput`], [`~generation.BeamSampleEncoderDecoderOutput`] or
|
[`~generation.GenerateBeamDecoderOnlyOutput`], [`~generation.GenerateBeamEncoderDecoderOutput`] or
|
||||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||||
[`~generation.BeamSampleDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
[`~generation.GenerateBeamDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||||
`return_dict_in_generate=True` or a [`~generation.BeamSampleEncoderDecoderOutput`] if
|
`return_dict_in_generate=True` or a [`~generation.GenerateBeamEncoderDecoderOutput`] if
|
||||||
`model.config.is_encoder_decoder=True`.
|
`model.config.is_encoder_decoder=True`.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
@@ -3641,7 +3373,7 @@ class GenerationMixin:
|
|||||||
sequence_outputs["sequence_scores"] = None
|
sequence_outputs["sequence_scores"] = None
|
||||||
|
|
||||||
if self.config.is_encoder_decoder:
|
if self.config.is_encoder_decoder:
|
||||||
return BeamSampleEncoderDecoderOutput(
|
return GenerateBeamEncoderDecoderOutput(
|
||||||
sequences=sequence_outputs["sequences"],
|
sequences=sequence_outputs["sequences"],
|
||||||
sequences_scores=sequence_outputs["sequence_scores"],
|
sequences_scores=sequence_outputs["sequence_scores"],
|
||||||
scores=scores,
|
scores=scores,
|
||||||
@@ -3654,7 +3386,7 @@ class GenerationMixin:
|
|||||||
past_key_values=model_kwargs.get("past_key_values"),
|
past_key_values=model_kwargs.get("past_key_values"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return BeamSampleDecoderOnlyOutput(
|
return GenerateBeamDecoderOnlyOutput(
|
||||||
sequences=sequence_outputs["sequences"],
|
sequences=sequence_outputs["sequences"],
|
||||||
sequences_scores=sequence_outputs["sequence_scores"],
|
sequences_scores=sequence_outputs["sequence_scores"],
|
||||||
scores=scores,
|
scores=scores,
|
||||||
@@ -3731,11 +3463,11 @@ class GenerationMixin:
|
|||||||
model is an encoder-decoder model the kwargs should include `encoder_outputs`.
|
model is an encoder-decoder model the kwargs should include `encoder_outputs`.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
[`~generation.BeamSearchDecoderOnlyOutput`], [`~generation.BeamSearchEncoderDecoderOutput`] or
|
[`~generation.GenerateBeamDecoderOnlyOutput`], [`~generation.GenerateBeamEncoderDecoderOutput`] or
|
||||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||||
[`~generation.BeamSearchDecoderOnlyOutput`] if [`~generation.BeamSearchDecoderOnlyOutput`] if
|
[`~generation.GenerateBeamDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||||
`model.config.is_encoder_decoder=False` and `return_dict_in_generate=True` or a
|
`return_dict_in_generate=True` or a [`~generation.GenerateBeamEncoderDecoderOutput`] if
|
||||||
[`~generation.BeamSearchEncoderDecoderOutput`] if `model.config.is_encoder_decoder=True`.
|
`model.config.is_encoder_decoder=True`.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
@@ -4026,7 +3758,7 @@ class GenerationMixin:
|
|||||||
sequence_outputs["sequence_scores"] = None
|
sequence_outputs["sequence_scores"] = None
|
||||||
|
|
||||||
if self.config.is_encoder_decoder:
|
if self.config.is_encoder_decoder:
|
||||||
return BeamSearchEncoderDecoderOutput(
|
return GenerateBeamEncoderDecoderOutput(
|
||||||
sequences=sequence_outputs["sequences"],
|
sequences=sequence_outputs["sequences"],
|
||||||
sequences_scores=sequence_outputs["sequence_scores"],
|
sequences_scores=sequence_outputs["sequence_scores"],
|
||||||
scores=scores,
|
scores=scores,
|
||||||
@@ -4039,7 +3771,7 @@ class GenerationMixin:
|
|||||||
past_key_values=model_kwargs.get("past_key_values"),
|
past_key_values=model_kwargs.get("past_key_values"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return BeamSearchDecoderOnlyOutput(
|
return GenerateBeamDecoderOnlyOutput(
|
||||||
sequences=sequence_outputs["sequences"],
|
sequences=sequence_outputs["sequences"],
|
||||||
sequences_scores=sequence_outputs["sequence_scores"],
|
sequences_scores=sequence_outputs["sequence_scores"],
|
||||||
scores=scores,
|
scores=scores,
|
||||||
@@ -4066,7 +3798,7 @@ class GenerationMixin:
|
|||||||
return_dict_in_generate: Optional[bool] = None,
|
return_dict_in_generate: Optional[bool] = None,
|
||||||
synced_gpus: Optional[bool] = None,
|
synced_gpus: Optional[bool] = None,
|
||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
) -> Union[BeamSearchOutput, torch.LongTensor]:
|
) -> Union[GenerateBeamOutput, torch.LongTensor]:
|
||||||
r"""
|
r"""
|
||||||
Generates sequences of token ids for models with a language modeling head using **constrained beam search
|
Generates sequences of token ids for models with a language modeling head using **constrained beam search
|
||||||
decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
|
||||||
@@ -4120,10 +3852,10 @@ class GenerationMixin:
|
|||||||
an encoder-decoder model the kwargs should include `encoder_outputs`.
|
an encoder-decoder model the kwargs should include `encoder_outputs`.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
[`generation.BeamSearchDecoderOnlyOutput`], [`~generation.BeamSearchEncoderDecoderOutput`] or
|
[`~generation.GenerateBeamDecoderOnlyOutput`], [`~generation.GenerateBeamEncoderDecoderOutput`] or
|
||||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||||
[`~generation.BeamSearchDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
[`~generation.GenerateBeamDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||||
`return_dict_in_generate=True` or a [`~generation.BeamSearchEncoderDecoderOutput`] if
|
`return_dict_in_generate=True` or a [`~generation.GenerateBeamEncoderDecoderOutput`] if
|
||||||
`model.config.is_encoder_decoder=True`.
|
`model.config.is_encoder_decoder=True`.
|
||||||
|
|
||||||
|
|
||||||
@@ -4369,7 +4101,7 @@ class GenerationMixin:
|
|||||||
if not output_scores:
|
if not output_scores:
|
||||||
sequence_outputs["sequence_scores"] = None
|
sequence_outputs["sequence_scores"] = None
|
||||||
if self.config.is_encoder_decoder:
|
if self.config.is_encoder_decoder:
|
||||||
return BeamSearchEncoderDecoderOutput(
|
return GenerateBeamEncoderDecoderOutput(
|
||||||
sequences=sequence_outputs["sequences"],
|
sequences=sequence_outputs["sequences"],
|
||||||
sequences_scores=sequence_outputs["sequence_scores"],
|
sequences_scores=sequence_outputs["sequence_scores"],
|
||||||
scores=scores,
|
scores=scores,
|
||||||
@@ -4382,7 +4114,7 @@ class GenerationMixin:
|
|||||||
past_key_values=model_kwargs.get("past_key_values"),
|
past_key_values=model_kwargs.get("past_key_values"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return BeamSearchDecoderOnlyOutput(
|
return GenerateBeamDecoderOnlyOutput(
|
||||||
sequences=sequence_outputs["sequences"],
|
sequences=sequence_outputs["sequences"],
|
||||||
sequences_scores=sequence_outputs["sequence_scores"],
|
sequences_scores=sequence_outputs["sequence_scores"],
|
||||||
scores=scores,
|
scores=scores,
|
||||||
@@ -4412,7 +4144,7 @@ class GenerationMixin:
|
|||||||
synced_gpus: bool = False,
|
synced_gpus: bool = False,
|
||||||
streamer: Optional["BaseStreamer"] = None,
|
streamer: Optional["BaseStreamer"] = None,
|
||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
):
|
) -> Union[GenerateNonBeamOutput, torch.LongTensor]:
|
||||||
r"""
|
r"""
|
||||||
Generates sequences of token ids for models with a language modeling head using **greedy decoding** or
|
Generates sequences of token ids for models with a language modeling head using **greedy decoding** or
|
||||||
**sample** (depending on `do_sample`), assisted by candidate sequences. Assisted generation is an example of a
|
**sample** (depending on `do_sample`), assisted by candidate sequences. Assisted generation is an example of a
|
||||||
@@ -4474,10 +4206,10 @@ class GenerationMixin:
|
|||||||
If model is an encoder-decoder model the kwargs should include `encoder_outputs`.
|
If model is an encoder-decoder model the kwargs should include `encoder_outputs`.
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
[`~generation.GreedySearchDecoderOnlyOutput`], [`~generation.GreedySearchEncoderDecoderOutput`] or
|
[`~generation.GenerateDecoderOnlyOutput`], [`~generation.GenerateEncoderDecoderOutput`] or
|
||||||
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
`torch.LongTensor`: A `torch.LongTensor` containing the generated tokens (default behaviour) or a
|
||||||
[`~generation.GreedySearchDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
[`~generation.GenerateDecoderOnlyOutput`] if `model.config.is_encoder_decoder=False` and
|
||||||
`return_dict_in_generate=True` or a [`~generation.GreedySearchEncoderDecoderOutput`] if
|
`return_dict_in_generate=True` or a [`~generation.GenerateEncoderDecoderOutput`] if
|
||||||
`model.config.is_encoder_decoder=True`.
|
`model.config.is_encoder_decoder=True`.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
@@ -4758,7 +4490,7 @@ class GenerationMixin:
|
|||||||
|
|
||||||
if return_dict_in_generate:
|
if return_dict_in_generate:
|
||||||
if self.config.is_encoder_decoder:
|
if self.config.is_encoder_decoder:
|
||||||
return GreedySearchEncoderDecoderOutput(
|
return GenerateEncoderDecoderOutput(
|
||||||
sequences=input_ids,
|
sequences=input_ids,
|
||||||
scores=scores,
|
scores=scores,
|
||||||
encoder_attentions=encoder_attentions,
|
encoder_attentions=encoder_attentions,
|
||||||
@@ -4769,7 +4501,7 @@ class GenerationMixin:
|
|||||||
past_key_values=model_kwargs.get("past_key_values"),
|
past_key_values=model_kwargs.get("past_key_values"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return GreedySearchDecoderOnlyOutput(
|
return GenerateDecoderOnlyOutput(
|
||||||
sequences=input_ids,
|
sequences=input_ids,
|
||||||
scores=scores,
|
scores=scores,
|
||||||
attentions=decoder_attentions,
|
attentions=decoder_attentions,
|
||||||
|
|||||||
@@ -1197,18 +1197,14 @@ class MusicgenForCausalLM(MusicgenPreTrainedModel):
|
|||||||
If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
|
If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
|
||||||
- [`~generation.GreedySearchDecoderOnlyOutput`],
|
- [`~generation.GenerateDecoderOnlyOutput`],
|
||||||
- [`~generation.SampleDecoderOnlyOutput`],
|
- [`~generation.GenerateBeamDecoderOnlyOutput`]
|
||||||
- [`~generation.BeamSearchDecoderOnlyOutput`],
|
|
||||||
- [`~generation.BeamSampleDecoderOnlyOutput`]
|
|
||||||
|
|
||||||
If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
|
If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
"""
|
"""
|
||||||
# 1. Handle `generation_config` and kwargs that might update it, and validate the resulting objects
|
# 1. Handle `generation_config` and kwargs that might update it, and validate the resulting objects
|
||||||
if generation_config is None:
|
if generation_config is None:
|
||||||
@@ -2244,18 +2240,14 @@ class MusicgenForConditionalGeneration(PreTrainedModel):
|
|||||||
If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
|
If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
|
||||||
- [`~generation.GreedySearchDecoderOnlyOutput`],
|
- [`~generation.GenerateDecoderOnlyOutput`],
|
||||||
- [`~generation.SampleDecoderOnlyOutput`],
|
- [`~generation.GenerateBeamDecoderOnlyOutput`]
|
||||||
- [`~generation.BeamSearchDecoderOnlyOutput`],
|
|
||||||
- [`~generation.BeamSampleDecoderOnlyOutput`]
|
|
||||||
|
|
||||||
If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
|
If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
"""
|
"""
|
||||||
# 1. Handle `generation_config` and kwargs that might update it, and validate the resulting objects
|
# 1. Handle `generation_config` and kwargs that might update it, and validate the resulting objects
|
||||||
if generation_config is None:
|
if generation_config is None:
|
||||||
|
|||||||
@@ -1264,10 +1264,8 @@ class Pop2PianoForConditionalGeneration(Pop2PianoPreTrainedModel):
|
|||||||
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`.
|
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`.
|
||||||
Since Pop2Piano is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
|
Since Pop2Piano is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if generation_config is None:
|
if generation_config is None:
|
||||||
|
|||||||
@@ -2845,11 +2845,8 @@ class SeamlessM4TForTextToText(SeamlessM4TPreTrainedModel):
|
|||||||
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
|
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
|
||||||
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. The possible
|
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. The possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
"""
|
"""
|
||||||
# prepare text_decoder_input_ids
|
# prepare text_decoder_input_ids
|
||||||
text_decoder_input_ids = kwargs.pop("decoder_input_ids", None)
|
text_decoder_input_ids = kwargs.pop("decoder_input_ids", None)
|
||||||
@@ -3134,11 +3131,8 @@ class SeamlessM4TForSpeechToText(SeamlessM4TPreTrainedModel):
|
|||||||
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
|
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
|
||||||
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. The possible
|
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. The possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
"""
|
"""
|
||||||
text_decoder_input_ids = kwargs.pop("decoder_input_ids", None)
|
text_decoder_input_ids = kwargs.pop("decoder_input_ids", None)
|
||||||
# overwrite text_decoder_input_ids if tgt_lang is passed. The latter gets priority over decoder_input_ids.
|
# overwrite text_decoder_input_ids if tgt_lang is passed. The latter gets priority over decoder_input_ids.
|
||||||
|
|||||||
@@ -3110,11 +3110,8 @@ class SeamlessM4Tv2ForTextToText(SeamlessM4Tv2PreTrainedModel):
|
|||||||
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
|
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
|
||||||
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. The possible
|
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. The possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
"""
|
"""
|
||||||
# prepare text_decoder_input_ids
|
# prepare text_decoder_input_ids
|
||||||
text_decoder_input_ids = kwargs.pop("decoder_input_ids", None)
|
text_decoder_input_ids = kwargs.pop("decoder_input_ids", None)
|
||||||
@@ -3409,11 +3406,8 @@ class SeamlessM4Tv2ForSpeechToText(SeamlessM4Tv2PreTrainedModel):
|
|||||||
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
|
[`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
|
||||||
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. The possible
|
or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. The possible
|
||||||
[`~utils.ModelOutput`] types are:
|
[`~utils.ModelOutput`] types are:
|
||||||
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
"""
|
"""
|
||||||
text_decoder_input_ids = kwargs.pop("decoder_input_ids", None)
|
text_decoder_input_ids = kwargs.pop("decoder_input_ids", None)
|
||||||
# overwrite text_decoder_input_ids if tgt_lang is passed. The latter gets priority over decoder_input_ids.
|
# overwrite text_decoder_input_ids if tgt_lang is passed. The latter gets priority over decoder_input_ids.
|
||||||
|
|||||||
@@ -1968,10 +1968,8 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
|
|||||||
|
|
||||||
else if the passed input is <= 30 seconds / >= 3000 mel input features, the possible [`~utils.ModelOutput`] types are:
|
else if the passed input is <= 30 seconds / >= 3000 mel input features, the possible [`~utils.ModelOutput`] types are:
|
||||||
|
|
||||||
- [`~generation.GreedySearchEncoderDecoderOutput`],
|
- [`~generation.GenerateEncoderDecoderOutput`],
|
||||||
- [`~generation.SampleEncoderDecoderOutput`],
|
- [`~generation.GenerateBeamEncoderDecoderOutput`]
|
||||||
- [`~generation.BeamSearchEncoderDecoderOutput`],
|
|
||||||
- [`~generation.BeamSampleEncoderDecoderOutput`]
|
|
||||||
|
|
||||||
else only the generated output sequence ids are returned.
|
else only the generated output sequence ids are returned.
|
||||||
|
|
||||||
|
|||||||
@@ -65,6 +65,10 @@ if is_torch_available():
|
|||||||
DisjunctiveConstraint,
|
DisjunctiveConstraint,
|
||||||
ForcedBOSTokenLogitsProcessor,
|
ForcedBOSTokenLogitsProcessor,
|
||||||
ForcedEOSTokenLogitsProcessor,
|
ForcedEOSTokenLogitsProcessor,
|
||||||
|
GenerateBeamDecoderOnlyOutput,
|
||||||
|
GenerateBeamEncoderDecoderOutput,
|
||||||
|
GenerateDecoderOnlyOutput,
|
||||||
|
GenerateEncoderDecoderOutput,
|
||||||
GreedySearchDecoderOnlyOutput,
|
GreedySearchDecoderOnlyOutput,
|
||||||
GreedySearchEncoderDecoderOutput,
|
GreedySearchEncoderDecoderOutput,
|
||||||
HammingDiversityLogitsProcessor,
|
HammingDiversityLogitsProcessor,
|
||||||
@@ -730,9 +734,15 @@ class GenerationTesterMixin:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if model.config.is_encoder_decoder:
|
if model.config.is_encoder_decoder:
|
||||||
|
self.assertIsInstance(output_greedy, GenerateEncoderDecoderOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_greedy, GreedySearchEncoderDecoderOutput)
|
self.assertIsInstance(output_greedy, GreedySearchEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput)
|
||||||
else:
|
else:
|
||||||
|
self.assertIsInstance(output_greedy, GenerateDecoderOnlyOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_greedy, GreedySearchDecoderOnlyOutput)
|
self.assertIsInstance(output_greedy, GreedySearchDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput)
|
||||||
|
|
||||||
@@ -848,9 +858,15 @@ class GenerationTesterMixin:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if model.config.is_encoder_decoder:
|
if model.config.is_encoder_decoder:
|
||||||
|
self.assertIsInstance(output_sample, GenerateEncoderDecoderOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_sample, SampleEncoderDecoderOutput)
|
self.assertIsInstance(output_sample, SampleEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, SampleEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, SampleEncoderDecoderOutput)
|
||||||
else:
|
else:
|
||||||
|
self.assertIsInstance(output_sample, GenerateDecoderOnlyOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_sample, SampleDecoderOnlyOutput)
|
self.assertIsInstance(output_sample, SampleDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, SampleDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, SampleDecoderOnlyOutput)
|
||||||
|
|
||||||
@@ -952,9 +968,15 @@ class GenerationTesterMixin:
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
if model.config.is_encoder_decoder:
|
if model.config.is_encoder_decoder:
|
||||||
|
self.assertIsInstance(output_beam_search, GenerateBeamEncoderDecoderOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateBeamEncoderDecoderOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_beam_search, BeamSearchEncoderDecoderOutput)
|
self.assertIsInstance(output_beam_search, BeamSearchEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput)
|
||||||
else:
|
else:
|
||||||
|
self.assertIsInstance(output_beam_search, GenerateBeamDecoderOnlyOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_beam_search, BeamSearchDecoderOnlyOutput)
|
self.assertIsInstance(output_beam_search, BeamSearchDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput)
|
||||||
|
|
||||||
@@ -1109,9 +1131,15 @@ class GenerationTesterMixin:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if model.config.is_encoder_decoder:
|
if model.config.is_encoder_decoder:
|
||||||
|
self.assertIsInstance(output_beam_sample, GenerateBeamEncoderDecoderOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateBeamEncoderDecoderOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_beam_sample, BeamSampleEncoderDecoderOutput)
|
self.assertIsInstance(output_beam_sample, BeamSampleEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, BeamSampleEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, BeamSampleEncoderDecoderOutput)
|
||||||
else:
|
else:
|
||||||
|
self.assertIsInstance(output_beam_sample, GenerateBeamDecoderOnlyOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_beam_sample, BeamSampleDecoderOnlyOutput)
|
self.assertIsInstance(output_beam_sample, BeamSampleDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, BeamSampleDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, BeamSampleDecoderOnlyOutput)
|
||||||
|
|
||||||
@@ -1238,9 +1266,15 @@ class GenerationTesterMixin:
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
if model.config.is_encoder_decoder:
|
if model.config.is_encoder_decoder:
|
||||||
|
self.assertIsInstance(output_group_beam_search, GenerateBeamEncoderDecoderOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateBeamEncoderDecoderOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_group_beam_search, BeamSearchEncoderDecoderOutput)
|
self.assertIsInstance(output_group_beam_search, BeamSearchEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput)
|
||||||
else:
|
else:
|
||||||
|
self.assertIsInstance(output_group_beam_search, GenerateBeamDecoderOnlyOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_group_beam_search, BeamSearchDecoderOnlyOutput)
|
self.assertIsInstance(output_group_beam_search, BeamSearchDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput)
|
||||||
|
|
||||||
@@ -1390,9 +1424,15 @@ class GenerationTesterMixin:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if model.config.is_encoder_decoder:
|
if model.config.is_encoder_decoder:
|
||||||
|
self.assertIsInstance(output_beam_search, GenerateBeamEncoderDecoderOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateBeamEncoderDecoderOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_beam_search, BeamSearchEncoderDecoderOutput)
|
self.assertIsInstance(output_beam_search, BeamSearchEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, BeamSearchEncoderDecoderOutput)
|
||||||
else:
|
else:
|
||||||
|
self.assertIsInstance(output_beam_search, GenerateBeamDecoderOnlyOutput)
|
||||||
|
self.assertIsInstance(output_generate, GenerateBeamDecoderOnlyOutput)
|
||||||
|
# Retrocompatibility check
|
||||||
self.assertIsInstance(output_beam_search, BeamSearchDecoderOnlyOutput)
|
self.assertIsInstance(output_beam_search, BeamSearchDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, BeamSearchDecoderOnlyOutput)
|
||||||
|
|
||||||
|
|||||||
@@ -53,12 +53,10 @@ if is_torch_available():
|
|||||||
set_seed,
|
set_seed,
|
||||||
)
|
)
|
||||||
from transformers.generation import (
|
from transformers.generation import (
|
||||||
GreedySearchDecoderOnlyOutput,
|
GenerateDecoderOnlyOutput,
|
||||||
GreedySearchEncoderDecoderOutput,
|
GenerateEncoderDecoderOutput,
|
||||||
InfNanRemoveLogitsProcessor,
|
InfNanRemoveLogitsProcessor,
|
||||||
LogitsProcessorList,
|
LogitsProcessorList,
|
||||||
SampleDecoderOnlyOutput,
|
|
||||||
SampleEncoderDecoderOutput,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -282,8 +280,8 @@ class MusicgenDecoderTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsInstance(output_greedy, GreedySearchDecoderOnlyOutput)
|
self.assertIsInstance(output_greedy, GenerateDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput)
|
||||||
|
|
||||||
self.assertNotIn(config.pad_token_id, output_generate)
|
self.assertNotIn(config.pad_token_id, output_generate)
|
||||||
|
|
||||||
@@ -308,8 +306,8 @@ class MusicgenDecoderTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsInstance(output_greedy, GreedySearchDecoderOnlyOutput)
|
self.assertIsInstance(output_greedy, GenerateDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput)
|
||||||
|
|
||||||
# override since we don't expect the outputs of `.generate` and `.sample` to be the same, since we perform
|
# override since we don't expect the outputs of `.generate` and `.sample` to be the same, since we perform
|
||||||
# additional post-processing in the former
|
# additional post-processing in the former
|
||||||
@@ -376,8 +374,8 @@ class MusicgenDecoderTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsInstance(output_sample, SampleDecoderOnlyOutput)
|
self.assertIsInstance(output_sample, GenerateDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, SampleDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput)
|
||||||
|
|
||||||
def test_greedy_generate_stereo_outputs(self):
|
def test_greedy_generate_stereo_outputs(self):
|
||||||
for model_class in self.greedy_sample_model_classes:
|
for model_class in self.greedy_sample_model_classes:
|
||||||
@@ -395,8 +393,8 @@ class MusicgenDecoderTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsInstance(output_greedy, GreedySearchDecoderOnlyOutput)
|
self.assertIsInstance(output_greedy, GenerateDecoderOnlyOutput)
|
||||||
self.assertIsInstance(output_generate, GreedySearchDecoderOnlyOutput)
|
self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput)
|
||||||
|
|
||||||
self.assertNotIn(config.pad_token_id, output_generate)
|
self.assertNotIn(config.pad_token_id, output_generate)
|
||||||
|
|
||||||
@@ -1001,8 +999,8 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsInstance(output_greedy, GreedySearchEncoderDecoderOutput)
|
self.assertIsInstance(output_greedy, GenerateEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput)
|
||||||
|
|
||||||
self.assertNotIn(config.pad_token_id, output_generate)
|
self.assertNotIn(config.pad_token_id, output_generate)
|
||||||
|
|
||||||
@@ -1026,8 +1024,8 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsInstance(output_greedy, GreedySearchEncoderDecoderOutput)
|
self.assertIsInstance(output_greedy, GenerateEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput)
|
||||||
|
|
||||||
def test_sample_generate(self):
|
def test_sample_generate(self):
|
||||||
for model_class in self.greedy_sample_model_classes:
|
for model_class in self.greedy_sample_model_classes:
|
||||||
@@ -1092,8 +1090,8 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsInstance(output_sample, SampleEncoderDecoderOutput)
|
self.assertIsInstance(output_sample, GenerateEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, SampleEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput)
|
||||||
|
|
||||||
def test_generate_without_input_ids(self):
|
def test_generate_without_input_ids(self):
|
||||||
config, _, _, _, max_length = self._get_input_ids_and_config()
|
config, _, _, _, max_length = self._get_input_ids_and_config()
|
||||||
@@ -1141,8 +1139,8 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
|
|||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsInstance(output_greedy, GreedySearchEncoderDecoderOutput)
|
self.assertIsInstance(output_greedy, GenerateEncoderDecoderOutput)
|
||||||
self.assertIsInstance(output_generate, GreedySearchEncoderDecoderOutput)
|
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput)
|
||||||
|
|
||||||
self.assertNotIn(config.pad_token_id, output_generate)
|
self.assertNotIn(config.pad_token_id, output_generate)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user