Generate: TF supports multiple eos tokens (#21571)
This commit is contained in:
@@ -1230,7 +1230,7 @@ class TFGenerationMixin:
|
|||||||
) -> tf.Tensor:
|
) -> tf.Tensor:
|
||||||
if self.config.is_encoder_decoder and encoder_outputs is not None:
|
if self.config.is_encoder_decoder and encoder_outputs is not None:
|
||||||
# make dummy input_ids with value -100, as a sanity check ensuring that they won't be used for encoding
|
# make dummy input_ids with value -100, as a sanity check ensuring that they won't be used for encoding
|
||||||
shape = encoder_outputs.last_hidden_state.size()[:-1]
|
shape = encoder_outputs.last_hidden_state.shape[:-1]
|
||||||
return tf.ones(shape, dtype=tf.int32) * -100
|
return tf.ones(shape, dtype=tf.int32) * -100
|
||||||
|
|
||||||
if bos_token_id is None:
|
if bos_token_id is None:
|
||||||
@@ -1515,8 +1515,8 @@ class TFGenerationMixin:
|
|||||||
The maximum length of the sequence to be generated.
|
The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
@@ -1575,6 +1575,8 @@ class TFGenerationMixin:
|
|||||||
max_length = max_length if max_length is not None else self.generation_config.max_length
|
max_length = max_length if max_length is not None else self.generation_config.max_length
|
||||||
pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id
|
pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id
|
||||||
eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id
|
eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id
|
||||||
|
if isinstance(eos_token_id, int):
|
||||||
|
eos_token_id = [eos_token_id]
|
||||||
output_scores = output_scores if output_scores is not None else self.generation_config.output_scores
|
output_scores = output_scores if output_scores is not None else self.generation_config.output_scores
|
||||||
output_attentions = (
|
output_attentions = (
|
||||||
output_attentions if output_attentions is not None else self.generation_config.output_attentions
|
output_attentions if output_attentions is not None else self.generation_config.output_attentions
|
||||||
@@ -1660,7 +1662,13 @@ class TFGenerationMixin:
|
|||||||
raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
|
raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
|
||||||
unfinished_seq = 1 - tf.cast(finished_sequences, tf.int32)
|
unfinished_seq = 1 - tf.cast(finished_sequences, tf.int32)
|
||||||
next_tokens = next_tokens * unfinished_seq + pad_token_id * (1 - unfinished_seq)
|
next_tokens = next_tokens * unfinished_seq + pad_token_id * (1 - unfinished_seq)
|
||||||
finished_sequences = finished_sequences | (next_tokens == eos_token_id)
|
next_token_is_eos = tf.math.reduce_any(
|
||||||
|
tf.equal(
|
||||||
|
tf.broadcast_to(next_tokens, (len(eos_token_id), batch_size)), tf.expand_dims(eos_token_id, -1)
|
||||||
|
),
|
||||||
|
axis=0,
|
||||||
|
)
|
||||||
|
finished_sequences = finished_sequences | next_token_is_eos
|
||||||
|
|
||||||
# update `generated` and `cur_len`
|
# update `generated` and `cur_len`
|
||||||
update_indices = tf.stack([tf.range(batch_size), tf.broadcast_to(cur_len, [batch_size])], axis=-1)
|
update_indices = tf.stack([tf.range(batch_size), tf.broadcast_to(cur_len, [batch_size])], axis=-1)
|
||||||
@@ -1776,8 +1784,8 @@ class TFGenerationMixin:
|
|||||||
The maximum length of the sequence to be generated.
|
The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
seed (`List[int]`, *optional*):
|
seed (`List[int]`, *optional*):
|
||||||
Random seed to control sampling, containing two integers, used when `do_sample` is `True`. See the
|
Random seed to control sampling, containing two integers, used when `do_sample` is `True`. See the
|
||||||
`seed` argument from stateless functions in `tf.random`.
|
`seed` argument from stateless functions in `tf.random`.
|
||||||
@@ -1852,6 +1860,8 @@ class TFGenerationMixin:
|
|||||||
max_length = max_length if max_length is not None else self.generation_config.max_length
|
max_length = max_length if max_length is not None else self.generation_config.max_length
|
||||||
pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id
|
pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id
|
||||||
eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id
|
eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id
|
||||||
|
if isinstance(eos_token_id, int):
|
||||||
|
eos_token_id = [eos_token_id]
|
||||||
output_scores = output_scores if output_scores is not None else self.generation_config.output_scores
|
output_scores = output_scores if output_scores is not None else self.generation_config.output_scores
|
||||||
output_attentions = (
|
output_attentions = (
|
||||||
output_attentions if output_attentions is not None else self.generation_config.output_attentions
|
output_attentions if output_attentions is not None else self.generation_config.output_attentions
|
||||||
@@ -1943,7 +1953,13 @@ class TFGenerationMixin:
|
|||||||
raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
|
raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
|
||||||
unfinished_seq = 1 - tf.cast(finished_sequences, tf.int32)
|
unfinished_seq = 1 - tf.cast(finished_sequences, tf.int32)
|
||||||
next_tokens = next_tokens * unfinished_seq + pad_token_id * (1 - unfinished_seq)
|
next_tokens = next_tokens * unfinished_seq + pad_token_id * (1 - unfinished_seq)
|
||||||
finished_sequences = finished_sequences | (next_tokens == eos_token_id)
|
next_token_is_eos = tf.math.reduce_any(
|
||||||
|
tf.equal(
|
||||||
|
tf.broadcast_to(next_tokens, (len(eos_token_id), batch_size)), tf.expand_dims(eos_token_id, -1)
|
||||||
|
),
|
||||||
|
axis=0,
|
||||||
|
)
|
||||||
|
finished_sequences = finished_sequences | next_token_is_eos
|
||||||
|
|
||||||
# update `generated` and `cur_len`
|
# update `generated` and `cur_len`
|
||||||
update_indices = tf.stack([tf.range(batch_size), tf.broadcast_to(cur_len, [batch_size])], axis=-1)
|
update_indices = tf.stack([tf.range(batch_size), tf.broadcast_to(cur_len, [batch_size])], axis=-1)
|
||||||
@@ -2079,8 +2095,8 @@ class TFGenerationMixin:
|
|||||||
The maximum length of the sequence to be generated.
|
The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
length_penalty (`float`, *optional*, defaults to 1.0):
|
length_penalty (`float`, *optional*, defaults to 1.0):
|
||||||
Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent
|
Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent
|
||||||
to the sequence length, which in turn is used to divide the score of the sequence. Since the score is
|
to the sequence length, which in turn is used to divide the score of the sequence. Since the score is
|
||||||
@@ -2180,6 +2196,8 @@ class TFGenerationMixin:
|
|||||||
max_length = max_length if max_length is not None else self.generation_config.max_length
|
max_length = max_length if max_length is not None else self.generation_config.max_length
|
||||||
pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id
|
pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id
|
||||||
eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id
|
eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id
|
||||||
|
if isinstance(eos_token_id, int):
|
||||||
|
eos_token_id = [eos_token_id]
|
||||||
num_return_sequences = (
|
num_return_sequences = (
|
||||||
num_return_sequences if num_return_sequences is not None else self.generation_config.num_return_sequences
|
num_return_sequences if num_return_sequences is not None else self.generation_config.num_return_sequences
|
||||||
)
|
)
|
||||||
@@ -2401,9 +2419,18 @@ class TFGenerationMixin:
|
|||||||
# Update current sequences: Did the top `num_beams` sequences reach an end marker?
|
# Update current sequences: Did the top `num_beams` sequences reach an end marker?
|
||||||
# To prevent these just finished sequences from being added to the current sequences
|
# To prevent these just finished sequences from being added to the current sequences
|
||||||
# set of active beam search sequences, set their log probs to a very large negative value.
|
# set of active beam search sequences, set their log probs to a very large negative value.
|
||||||
eos_in_next_token = topk_sequences[:, :, cur_len] == eos_token_id
|
|
||||||
if eos_token_id is None:
|
if eos_token_id is None:
|
||||||
eos_in_next_token = tf.broadcast_to(eos_in_next_token, topk_sequences[:, :, cur_len].shape)
|
eos_in_next_token = tf.zeros(topk_sequences[:, :, cur_len].shape, dtype=tf.bool)
|
||||||
|
else:
|
||||||
|
eos_in_next_token = tf.math.reduce_any(
|
||||||
|
tf.equal(
|
||||||
|
tf.broadcast_to(
|
||||||
|
topk_sequences[:, :, cur_len], [len(eos_token_id)] + topk_sequences[:, :, cur_len].shape
|
||||||
|
),
|
||||||
|
tf.expand_dims(tf.expand_dims(eos_token_id, -1), -1),
|
||||||
|
),
|
||||||
|
axis=0,
|
||||||
|
)
|
||||||
did_topk_just_finished = eos_in_next_token & tf.broadcast_to(
|
did_topk_just_finished = eos_in_next_token & tf.broadcast_to(
|
||||||
tf.concat((tf.ones((num_beams), dtype=tf.bool), tf.zeros((num_beams), dtype=tf.bool)), axis=0),
|
tf.concat((tf.ones((num_beams), dtype=tf.bool), tf.zeros((num_beams), dtype=tf.bool)), axis=0),
|
||||||
shape_list(eos_in_next_token),
|
shape_list(eos_in_next_token),
|
||||||
@@ -2649,8 +2676,8 @@ class TFGenerationMixin:
|
|||||||
The maximum length of the sequence to be generated.
|
The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
@@ -2700,6 +2727,8 @@ class TFGenerationMixin:
|
|||||||
max_length = max_length if max_length is not None else self.generation_config.max_length
|
max_length = max_length if max_length is not None else self.generation_config.max_length
|
||||||
pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id
|
pad_token_id = pad_token_id if pad_token_id is not None else self.generation_config.pad_token_id
|
||||||
eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id
|
eos_token_id = eos_token_id if eos_token_id is not None else self.generation_config.eos_token_id
|
||||||
|
if isinstance(eos_token_id, int):
|
||||||
|
eos_token_id = [eos_token_id]
|
||||||
output_scores = output_scores if output_scores is not None else self.generation_config.output_scores
|
output_scores = output_scores if output_scores is not None else self.generation_config.output_scores
|
||||||
output_attentions = (
|
output_attentions = (
|
||||||
output_attentions if output_attentions is not None else self.generation_config.output_attentions
|
output_attentions if output_attentions is not None else self.generation_config.output_attentions
|
||||||
@@ -2924,7 +2953,13 @@ class TFGenerationMixin:
|
|||||||
raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
|
raise ValueError("If `eos_token_id` is defined, make sure that `pad_token_id` is defined.")
|
||||||
unfinished_seq = 1 - tf.cast(finished_sequences, tf.int32)
|
unfinished_seq = 1 - tf.cast(finished_sequences, tf.int32)
|
||||||
next_tokens = next_tokens * unfinished_seq + pad_token_id * (1 - unfinished_seq)
|
next_tokens = next_tokens * unfinished_seq + pad_token_id * (1 - unfinished_seq)
|
||||||
finished_sequences = finished_sequences | (next_tokens == eos_token_id)
|
next_token_is_eos = tf.math.reduce_any(
|
||||||
|
tf.equal(
|
||||||
|
tf.broadcast_to(next_tokens, (len(eos_token_id), batch_size)), tf.expand_dims(eos_token_id, -1)
|
||||||
|
),
|
||||||
|
axis=0,
|
||||||
|
)
|
||||||
|
finished_sequences = finished_sequences | next_token_is_eos
|
||||||
|
|
||||||
# update `generated` and `cur_len`
|
# update `generated` and `cur_len`
|
||||||
update_indices = tf.stack([tf.range(batch_size), tf.broadcast_to(cur_len, [batch_size])], axis=-1)
|
update_indices = tf.stack([tf.range(batch_size), tf.broadcast_to(cur_len, [batch_size])], axis=-1)
|
||||||
|
|||||||
@@ -1702,8 +1702,8 @@ class GenerationMixin:
|
|||||||
used to tell if the generation loop should stop.
|
used to tell if the generation loop should stop.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
@@ -2057,8 +2057,8 @@ class GenerationMixin:
|
|||||||
tokens. The maximum length of the sequence to be generated.
|
tokens. The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
@@ -2306,8 +2306,8 @@ class GenerationMixin:
|
|||||||
tokens. The maximum length of the sequence to be generated.
|
tokens. The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
@@ -2574,8 +2574,8 @@ class GenerationMixin:
|
|||||||
tokens. The maximum length of the sequence to be generated.
|
tokens. The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
@@ -2902,8 +2902,8 @@ class GenerationMixin:
|
|||||||
tokens. The maximum length of the sequence to be generated.
|
tokens. The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
@@ -3230,8 +3230,8 @@ class GenerationMixin:
|
|||||||
tokens. The maximum length of the sequence to be generated.
|
tokens. The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
@@ -3613,8 +3613,8 @@ class GenerationMixin:
|
|||||||
tokens. The maximum length of the sequence to be generated.
|
tokens. The maximum length of the sequence to be generated.
|
||||||
pad_token_id (`int`, *optional*):
|
pad_token_id (`int`, *optional*):
|
||||||
The id of the *padding* token.
|
The id of the *padding* token.
|
||||||
eos_token_id (`int`, *optional*):
|
eos_token_id (`Union[int, List[int]]`, *optional*):
|
||||||
The id of the *end-of-sequence* token.
|
The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
|
||||||
output_attentions (`bool`, *optional*, defaults to `False`):
|
output_attentions (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
|
||||||
returned tensors for more details.
|
returned tensors for more details.
|
||||||
|
|||||||
@@ -12,11 +12,15 @@ class GenerationIntegrationTestsMixin:
|
|||||||
# To be populated by the child classes
|
# To be populated by the child classes
|
||||||
framework_dependent_parameters = {
|
framework_dependent_parameters = {
|
||||||
"AutoModelForCausalLM": None,
|
"AutoModelForCausalLM": None,
|
||||||
|
"AutoModelForSpeechSeq2Seq": None,
|
||||||
"AutoModelForSeq2SeqLM": None,
|
"AutoModelForSeq2SeqLM": None,
|
||||||
|
"AutoModelForVision2Seq": None,
|
||||||
"LogitsProcessorList": None,
|
"LogitsProcessorList": None,
|
||||||
"MinLengthLogitsProcessor": None,
|
"MinLengthLogitsProcessor": None,
|
||||||
"create_tensor_fn": None,
|
"create_tensor_fn": None,
|
||||||
|
"floats_tensor": None,
|
||||||
"return_tensors": None,
|
"return_tensors": None,
|
||||||
|
"set_seed": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
def test_validate_generation_inputs(self):
|
def test_validate_generation_inputs(self):
|
||||||
@@ -486,3 +490,171 @@ class GenerationIntegrationTestsMixin:
|
|||||||
input_ids = tokenizer(article, return_tensors=return_tensors).input_ids
|
input_ids = tokenizer(article, return_tensors=return_tensors).input_ids
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
model.generate(input_ids, input_ids=input_ids)
|
model.generate(input_ids, input_ids=input_ids)
|
||||||
|
|
||||||
|
def test_generate_too_many_encoder_kwargs(self):
|
||||||
|
model_cls = self.framework_dependent_parameters["AutoModelForSeq2SeqLM"]
|
||||||
|
return_tensors = self.framework_dependent_parameters["return_tensors"]
|
||||||
|
|
||||||
|
article = """I need input_ids to generate"""
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||||
|
model = model_cls.from_pretrained("hf-internal-testing/tiny-random-bart", max_length=10)
|
||||||
|
input_ids = tokenizer(article, return_tensors=return_tensors).input_ids
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
model.generate(input_ids=input_ids, inputs_embeds=input_ids)
|
||||||
|
|
||||||
|
def test_generate_input_features_as_encoder_kwarg(self):
|
||||||
|
model_cls = self.framework_dependent_parameters["AutoModelForSpeechSeq2Seq"]
|
||||||
|
floats_tensor = self.framework_dependent_parameters["floats_tensor"]
|
||||||
|
is_pt = not model_cls.__name__.startswith("TF")
|
||||||
|
|
||||||
|
input_features = floats_tensor((3, 80, 60))
|
||||||
|
model = model_cls.from_pretrained("hf-internal-testing/tiny-random-WhisperForConditionalGeneration")
|
||||||
|
if is_pt:
|
||||||
|
input_features.to(torch_device)
|
||||||
|
model = model.to(torch_device)
|
||||||
|
|
||||||
|
output_sequences_kwargs = model.generate(input_features=input_features, max_length=5)
|
||||||
|
output_sequences = model.generate(input_features, max_length=5)
|
||||||
|
if is_pt:
|
||||||
|
output_sequences_kwargs = output_sequences_kwargs.cpu().numpy()
|
||||||
|
output_sequences = output_sequences.cpu().numpy()
|
||||||
|
|
||||||
|
self.assertTrue(np.array_equal(output_sequences, output_sequences_kwargs))
|
||||||
|
self.assertEqual(output_sequences.shape, (3, 5))
|
||||||
|
|
||||||
|
def test_generate_pixel_values_as_encoder_kwarg(self):
|
||||||
|
model_cls = self.framework_dependent_parameters["AutoModelForVision2Seq"]
|
||||||
|
floats_tensor = self.framework_dependent_parameters["floats_tensor"]
|
||||||
|
is_pt = not model_cls.__name__.startswith("TF")
|
||||||
|
|
||||||
|
pixel_values = floats_tensor((2, 3, 30, 30))
|
||||||
|
model = model_cls.from_pretrained("hf-internal-testing/tiny-random-VisionEncoderDecoderModel-vit-gpt2")
|
||||||
|
model.config.decoder.eos_token_id = None
|
||||||
|
if is_pt:
|
||||||
|
pixel_values = pixel_values.to(torch_device)
|
||||||
|
model = model.to(torch_device)
|
||||||
|
|
||||||
|
output_sequences_kwargs = model.generate(pixel_values=pixel_values, max_length=5)
|
||||||
|
output_sequences = model.generate(pixel_values, max_length=5)
|
||||||
|
if is_pt:
|
||||||
|
output_sequences_kwargs = output_sequences_kwargs.cpu().numpy()
|
||||||
|
output_sequences = output_sequences.cpu().numpy()
|
||||||
|
|
||||||
|
self.assertTrue(np.array_equal(output_sequences, output_sequences_kwargs))
|
||||||
|
self.assertEqual(output_sequences.shape, (2, 5))
|
||||||
|
|
||||||
|
def test_generate_encoder_outputs_attention_mask(self):
|
||||||
|
model_cls = self.framework_dependent_parameters["AutoModelForSpeechSeq2Seq"]
|
||||||
|
floats_tensor = self.framework_dependent_parameters["floats_tensor"]
|
||||||
|
create_tensor_fn = self.framework_dependent_parameters["create_tensor_fn"]
|
||||||
|
is_pt = not model_cls.__name__.startswith("TF")
|
||||||
|
|
||||||
|
input_features = floats_tensor((3, 80, 60))
|
||||||
|
attention_mask = create_tensor_fn(np.ones(input_features.shape))
|
||||||
|
model = model_cls.from_pretrained("hf-internal-testing/tiny-random-WhisperForConditionalGeneration")
|
||||||
|
if is_pt:
|
||||||
|
input_features = input_features.to(torch_device)
|
||||||
|
attention_mask = attention_mask.to(torch_device)
|
||||||
|
model = model.to(torch_device)
|
||||||
|
|
||||||
|
encoder = model.get_encoder()
|
||||||
|
encoder_outputs = encoder(input_features)
|
||||||
|
|
||||||
|
output_sequences_no_mask = model.generate(encoder_outputs=encoder_outputs)
|
||||||
|
output_sequences_with_mask = model.generate(encoder_outputs=encoder_outputs, attention_mask=attention_mask)
|
||||||
|
if is_pt:
|
||||||
|
output_sequences_no_mask = output_sequences_no_mask.cpu().numpy()
|
||||||
|
output_sequences_with_mask = output_sequences_with_mask.cpu().numpy()
|
||||||
|
|
||||||
|
self.assertTrue(np.array_equal(output_sequences_no_mask, output_sequences_with_mask))
|
||||||
|
|
||||||
|
def test_eos_token_id_int_and_list_greedy_search(self):
|
||||||
|
model_cls = self.framework_dependent_parameters["AutoModelForCausalLM"]
|
||||||
|
return_tensors = self.framework_dependent_parameters["return_tensors"]
|
||||||
|
is_pt = not model_cls.__name__.startswith("TF")
|
||||||
|
|
||||||
|
generation_kwargs = {
|
||||||
|
"do_sample": False,
|
||||||
|
"num_beams": 1,
|
||||||
|
}
|
||||||
|
expectation = 13
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
text = """Hello, my dog is cute and"""
|
||||||
|
tokens = tokenizer(text, return_tensors=return_tensors)
|
||||||
|
model = model_cls.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
if is_pt:
|
||||||
|
model = model.to(torch_device)
|
||||||
|
tokens = tokens.to(torch_device)
|
||||||
|
|
||||||
|
eos_token_id = 873
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||||
|
|
||||||
|
eos_token_id = [873, 198]
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||||
|
|
||||||
|
def test_eos_token_id_int_and_list_contrastive_search(self):
|
||||||
|
model_cls = self.framework_dependent_parameters["AutoModelForCausalLM"]
|
||||||
|
return_tensors = self.framework_dependent_parameters["return_tensors"]
|
||||||
|
is_pt = not model_cls.__name__.startswith("TF")
|
||||||
|
|
||||||
|
generation_kwargs = {
|
||||||
|
"do_sample": False,
|
||||||
|
"num_beams": 1,
|
||||||
|
"penalty_alpha": 0.6,
|
||||||
|
"top_k": 4,
|
||||||
|
}
|
||||||
|
expectation = 17
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
text = """Hello, my dog is cute and"""
|
||||||
|
tokens = tokenizer(text, return_tensors=return_tensors)
|
||||||
|
model = model_cls.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
if is_pt:
|
||||||
|
model = model.to(torch_device)
|
||||||
|
tokens = tokens.to(torch_device)
|
||||||
|
|
||||||
|
eos_token_id = 225
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||||
|
|
||||||
|
eos_token_id = [225, 198]
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||||
|
|
||||||
|
def test_eos_token_id_int_and_list_beam_search(self):
|
||||||
|
model_cls = self.framework_dependent_parameters["AutoModelForCausalLM"]
|
||||||
|
return_tensors = self.framework_dependent_parameters["return_tensors"]
|
||||||
|
is_pt = not model_cls.__name__.startswith("TF")
|
||||||
|
|
||||||
|
generation_kwargs = {
|
||||||
|
"do_sample": False,
|
||||||
|
"num_beams": 3,
|
||||||
|
}
|
||||||
|
expectation = 13
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
text = """Hello, my dog is cute and"""
|
||||||
|
tokens = tokenizer(text, return_tensors=return_tensors)
|
||||||
|
model = model_cls.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
if is_pt:
|
||||||
|
model = model.to(torch_device)
|
||||||
|
tokens = tokens.to(torch_device)
|
||||||
|
|
||||||
|
eos_token_id = 873
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
unpadded_correct_condition = expectation == len(generated_tokens[0])
|
||||||
|
padded_correct_condition = expectation < len(generated_tokens[0]) and all(
|
||||||
|
[token == model.config.pad_token_id for token in generated_tokens[0][expectation:]]
|
||||||
|
)
|
||||||
|
self.assertTrue(unpadded_correct_condition or padded_correct_condition)
|
||||||
|
|
||||||
|
eos_token_id = [873, 198]
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
unpadded_correct_condition = expectation == len(generated_tokens[0])
|
||||||
|
padded_correct_condition = expectation < len(generated_tokens[0]) and all(
|
||||||
|
[token == model.config.pad_token_id for token in generated_tokens[0][expectation:]]
|
||||||
|
)
|
||||||
|
self.assertTrue(unpadded_correct_condition or padded_correct_condition)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import unittest
|
|||||||
from transformers import is_tf_available
|
from transformers import is_tf_available
|
||||||
from transformers.testing_utils import require_tf, slow
|
from transformers.testing_utils import require_tf, slow
|
||||||
|
|
||||||
|
from ..test_modeling_tf_common import floats_tensor
|
||||||
from .test_framework_agnostic import GenerationIntegrationTestsMixin
|
from .test_framework_agnostic import GenerationIntegrationTestsMixin
|
||||||
|
|
||||||
|
|
||||||
@@ -26,8 +27,11 @@ if is_tf_available():
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from transformers import (
|
from transformers import (
|
||||||
|
AutoTokenizer,
|
||||||
TFAutoModelForCausalLM,
|
TFAutoModelForCausalLM,
|
||||||
TFAutoModelForSeq2SeqLM,
|
TFAutoModelForSeq2SeqLM,
|
||||||
|
TFAutoModelForSpeechSeq2Seq,
|
||||||
|
TFAutoModelForVision2Seq,
|
||||||
TFLogitsProcessorList,
|
TFLogitsProcessorList,
|
||||||
TFMinLengthLogitsProcessor,
|
TFMinLengthLogitsProcessor,
|
||||||
tf_top_k_top_p_filtering,
|
tf_top_k_top_p_filtering,
|
||||||
@@ -136,15 +140,19 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
|
|||||||
if is_tf_available():
|
if is_tf_available():
|
||||||
framework_dependent_parameters = {
|
framework_dependent_parameters = {
|
||||||
"AutoModelForCausalLM": TFAutoModelForCausalLM,
|
"AutoModelForCausalLM": TFAutoModelForCausalLM,
|
||||||
|
"AutoModelForSpeechSeq2Seq": TFAutoModelForSpeechSeq2Seq,
|
||||||
"AutoModelForSeq2SeqLM": TFAutoModelForSeq2SeqLM,
|
"AutoModelForSeq2SeqLM": TFAutoModelForSeq2SeqLM,
|
||||||
|
"AutoModelForVision2Seq": TFAutoModelForVision2Seq,
|
||||||
"LogitsProcessorList": TFLogitsProcessorList,
|
"LogitsProcessorList": TFLogitsProcessorList,
|
||||||
"MinLengthLogitsProcessor": TFMinLengthLogitsProcessor,
|
"MinLengthLogitsProcessor": TFMinLengthLogitsProcessor,
|
||||||
"create_tensor_fn": tf.convert_to_tensor,
|
"create_tensor_fn": tf.convert_to_tensor,
|
||||||
|
"floats_tensor": floats_tensor,
|
||||||
"return_tensors": "tf",
|
"return_tensors": "tf",
|
||||||
}
|
}
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_generate_tf_function_export_fixed_input_length(self):
|
def test_generate_tf_function_export_fixed_input_length(self):
|
||||||
|
# TF-only test: tf.saved_model export
|
||||||
test_model = TFAutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
test_model = TFAutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
input_length = 2
|
input_length = 2
|
||||||
max_new_tokens = 2
|
max_new_tokens = 2
|
||||||
@@ -187,6 +195,7 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_generate_tf_function_export_fixed_batch_size(self):
|
def test_generate_tf_function_export_fixed_batch_size(self):
|
||||||
|
# TF-only test: tf.saved_model export
|
||||||
test_model = TFAutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
test_model = TFAutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
batch_size = 1
|
batch_size = 1
|
||||||
max_new_tokens = 2
|
max_new_tokens = 2
|
||||||
@@ -226,3 +235,32 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
|
|||||||
tf_func_outputs = serving_func(**inputs)["sequences"]
|
tf_func_outputs = serving_func(**inputs)["sequences"]
|
||||||
tf_model_outputs = test_model.generate(**inputs, max_new_tokens=max_new_tokens)
|
tf_model_outputs = test_model.generate(**inputs, max_new_tokens=max_new_tokens)
|
||||||
tf.debugging.assert_equal(tf_func_outputs, tf_model_outputs)
|
tf.debugging.assert_equal(tf_func_outputs, tf_model_outputs)
|
||||||
|
|
||||||
|
def test_eos_token_id_int_and_list_top_k_top_sampling(self):
|
||||||
|
# Has PT equivalent: this test relies on random sampling
|
||||||
|
generation_kwargs = {
|
||||||
|
"do_sample": True,
|
||||||
|
"num_beams": 1,
|
||||||
|
"top_p": 0.7,
|
||||||
|
"top_k": 10,
|
||||||
|
"temperature": 0.7,
|
||||||
|
}
|
||||||
|
expectation = 14
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
text = """Hello, my dog is cute and"""
|
||||||
|
tokens = tokenizer(text, return_tensors="tf")
|
||||||
|
model = TFAutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
|
||||||
|
eos_token_id = 638
|
||||||
|
# forces the generation to happen on CPU, to avoid GPU-related quirks
|
||||||
|
with tf.device(":/CPU:0"):
|
||||||
|
tf.random.set_seed(0)
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||||
|
|
||||||
|
eos_token_id = [638, 198]
|
||||||
|
with tf.device(":/CPU:0"):
|
||||||
|
tf.random.set_seed(0)
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||||
|
|||||||
@@ -30,15 +30,15 @@ if is_torch_available():
|
|||||||
from transformers import (
|
from transformers import (
|
||||||
AutoModelForCausalLM,
|
AutoModelForCausalLM,
|
||||||
AutoModelForSeq2SeqLM,
|
AutoModelForSeq2SeqLM,
|
||||||
|
AutoModelForSpeechSeq2Seq,
|
||||||
|
AutoModelForVision2Seq,
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
BartForConditionalGeneration,
|
BartForConditionalGeneration,
|
||||||
BartTokenizer,
|
BartTokenizer,
|
||||||
GPT2LMHeadModel,
|
GPT2LMHeadModel,
|
||||||
GPT2Tokenizer,
|
GPT2Tokenizer,
|
||||||
ImageGPTForCausalImageModeling,
|
ImageGPTForCausalImageModeling,
|
||||||
Speech2TextForConditionalGeneration,
|
|
||||||
SpeechEncoderDecoderModel,
|
SpeechEncoderDecoderModel,
|
||||||
VisionEncoderDecoderModel,
|
|
||||||
top_k_top_p_filtering,
|
top_k_top_p_filtering,
|
||||||
)
|
)
|
||||||
from transformers.generation import (
|
from transformers.generation import (
|
||||||
@@ -1790,10 +1790,13 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
framework_dependent_parameters = {
|
framework_dependent_parameters = {
|
||||||
"AutoModelForCausalLM": AutoModelForCausalLM,
|
"AutoModelForCausalLM": AutoModelForCausalLM,
|
||||||
|
"AutoModelForSpeechSeq2Seq": AutoModelForSpeechSeq2Seq,
|
||||||
"AutoModelForSeq2SeqLM": AutoModelForSeq2SeqLM,
|
"AutoModelForSeq2SeqLM": AutoModelForSeq2SeqLM,
|
||||||
|
"AutoModelForVision2Seq": AutoModelForVision2Seq,
|
||||||
"LogitsProcessorList": LogitsProcessorList,
|
"LogitsProcessorList": LogitsProcessorList,
|
||||||
"MinLengthLogitsProcessor": MinLengthLogitsProcessor,
|
"MinLengthLogitsProcessor": MinLengthLogitsProcessor,
|
||||||
"create_tensor_fn": torch.tensor,
|
"create_tensor_fn": torch.tensor,
|
||||||
|
"floats_tensor": floats_tensor,
|
||||||
"return_tensors": "pt",
|
"return_tensors": "pt",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2093,7 +2096,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
self.assertEqual(output, [{"generated_text": "Hello I believe in in in number"}])
|
self.assertEqual(output, [{"generated_text": "Hello I believe in in in number"}])
|
||||||
|
|
||||||
def test_generate_non_nlp_input_ids_as_kwarg(self):
|
def test_generate_non_nlp_input_ids_as_kwarg(self):
|
||||||
# PT-only test: AFAIK there is no non-NLP model architecture in TF that supports `input_ids` as its only input
|
# PT-only test: AFAIK there's no non-NLP model architecture in TF that supports `input_ids` as its only input
|
||||||
model = ImageGPTForCausalImageModeling.from_pretrained(
|
model = ImageGPTForCausalImageModeling.from_pretrained(
|
||||||
"hf-internal-testing/tiny-random-imagegpt", max_length=10
|
"hf-internal-testing/tiny-random-imagegpt", max_length=10
|
||||||
).to(torch_device)
|
).to(torch_device)
|
||||||
@@ -2105,17 +2108,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
self.assertListEqual(output_sequences.tolist(), output_sequences_kwargs.tolist())
|
self.assertListEqual(output_sequences.tolist(), output_sequences_kwargs.tolist())
|
||||||
self.assertEqual(output_sequences.shape, (3, 10))
|
self.assertEqual(output_sequences.shape, (3, 10))
|
||||||
|
|
||||||
def test_generate_too_many_encoder_kwargs(self):
|
|
||||||
article = """I need input_ids to generate"""
|
|
||||||
tokenizer = BartTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
|
||||||
model = BartForConditionalGeneration.from_pretrained("hf-internal-testing/tiny-random-bart", max_length=10).to(
|
|
||||||
torch_device
|
|
||||||
)
|
|
||||||
input_ids = tokenizer(article, return_tensors="pt").input_ids.to(torch_device)
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
model.generate(input_ids=input_ids, inputs_embeds=input_ids)
|
|
||||||
|
|
||||||
def test_generate_input_values_as_encoder_kwarg(self):
|
def test_generate_input_values_as_encoder_kwarg(self):
|
||||||
|
# PT-only test: AFAIK there's no generate-capable architecture in TF that supports `input_values` as its input
|
||||||
input_values = floats_tensor((2, 250))
|
input_values = floats_tensor((2, 250))
|
||||||
model = SpeechEncoderDecoderModel.from_pretrained("hf-internal-testing/tiny-random-speech-encoder-decoder")
|
model = SpeechEncoderDecoderModel.from_pretrained("hf-internal-testing/tiny-random-speech-encoder-decoder")
|
||||||
model = model.to(torch_device)
|
model = model.to(torch_device)
|
||||||
@@ -2125,43 +2119,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
self.assertListEqual(output_sequences.tolist(), output_sequences_kwargs.tolist())
|
self.assertListEqual(output_sequences.tolist(), output_sequences_kwargs.tolist())
|
||||||
self.assertEqual(output_sequences.shape, (2, 5))
|
self.assertEqual(output_sequences.shape, (2, 5))
|
||||||
|
|
||||||
def test_generate_input_features_as_encoder_kwarg(self):
|
|
||||||
input_features = floats_tensor((3, 20, 24))
|
|
||||||
model = Speech2TextForConditionalGeneration.from_pretrained("hf-internal-testing/tiny-random-speech_to_text")
|
|
||||||
model = model.to(torch_device)
|
|
||||||
output_sequences_kwargs = model.generate(input_features=input_features, max_length=5).cpu()
|
|
||||||
output_sequences = model.generate(input_features, max_length=5).cpu()
|
|
||||||
|
|
||||||
self.assertListEqual(output_sequences.tolist(), output_sequences_kwargs.tolist())
|
|
||||||
self.assertEqual(output_sequences.shape, (3, 5))
|
|
||||||
|
|
||||||
def test_generate_pixel_values_as_encoder_kwarg(self):
|
|
||||||
pixel_values = floats_tensor((2, 3, 30, 30))
|
|
||||||
model = VisionEncoderDecoderModel.from_pretrained("hf-internal-testing/tiny-random-vision-encoder-decoder")
|
|
||||||
model = model.to(torch_device)
|
|
||||||
output_sequences_kwargs = model.generate(pixel_values=pixel_values, max_length=5).cpu()
|
|
||||||
output_sequences = model.generate(pixel_values, max_length=5).cpu()
|
|
||||||
|
|
||||||
self.assertListEqual(output_sequences.tolist(), output_sequences_kwargs.tolist())
|
|
||||||
self.assertEqual(output_sequences.shape, (2, 5))
|
|
||||||
|
|
||||||
def test_generate_encoder_outputs_attention_mask(self):
|
|
||||||
input_values = floats_tensor((2, 250)).to(torch_device)
|
|
||||||
attention_mask = torch.ones_like(input_values)
|
|
||||||
model = SpeechEncoderDecoderModel.from_pretrained("hf-internal-testing/tiny-random-speech-encoder-decoder")
|
|
||||||
model = model.to(torch_device)
|
|
||||||
|
|
||||||
encoder = model.get_encoder()
|
|
||||||
|
|
||||||
encoder_outputs = encoder(input_values)
|
|
||||||
|
|
||||||
output_sequences_no_mask = model.generate(encoder_outputs=encoder_outputs).cpu()
|
|
||||||
output_sequences_with_mask = model.generate(encoder_outputs=encoder_outputs, attention_mask=attention_mask)
|
|
||||||
output_sequences_with_mask = output_sequences_with_mask.cpu()
|
|
||||||
|
|
||||||
self.assertListEqual(output_sequences_no_mask.tolist(), output_sequences_with_mask.tolist())
|
|
||||||
|
|
||||||
def test_transition_scores_group_beam_search_encoder_decoder(self):
|
def test_transition_scores_group_beam_search_encoder_decoder(self):
|
||||||
|
# PT-only test: TF doesn't have group beam search
|
||||||
articles = [
|
articles = [
|
||||||
"Justin Timberlake and Jessica Biel, welcome to parenthood.",
|
"Justin Timberlake and Jessica Biel, welcome to parenthood.",
|
||||||
"Michael Phelps is arguably the most decorated Olympian of all time.",
|
"Michael Phelps is arguably the most decorated Olympian of all time.",
|
||||||
@@ -2188,64 +2147,9 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
|
|
||||||
self.assertTrue(torch.allclose(transition_scores_sum, outputs.sequences_scores, atol=1e-3))
|
self.assertTrue(torch.allclose(transition_scores_sum, outputs.sequences_scores, atol=1e-3))
|
||||||
|
|
||||||
def test_log_scores_sample_decoder_only(self):
|
|
||||||
articles = ["I need input_ids to generate", "Short and"]
|
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
|
||||||
tokenizer.padding_side = "left"
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
|
|
||||||
model = GPT2LMHeadModel.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
|
||||||
|
|
||||||
inputs = tokenizer(articles, return_tensors="pt", padding=True).to(torch_device)
|
|
||||||
|
|
||||||
result = model.generate(
|
|
||||||
**inputs,
|
|
||||||
max_length=15,
|
|
||||||
return_dict_in_generate=True,
|
|
||||||
do_sample=False,
|
|
||||||
output_scores=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# decoder-only starts generating from `input_ids`
|
|
||||||
begin_generation = inputs.input_ids.shape[-1]
|
|
||||||
|
|
||||||
gen_sequences = result.sequences[:, begin_generation:]
|
|
||||||
probs = torch.stack(result.scores, dim=1).softmax(-1)
|
|
||||||
|
|
||||||
gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)
|
|
||||||
expected_probs = torch.tensor([[0.0014, 0.0015], [0.0014, 0.0014]])
|
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(gen_probs.cpu(), expected_probs, atol=1e-3))
|
|
||||||
|
|
||||||
def test_log_scores_sample_encoder_decoder(self):
|
|
||||||
articles = ["I need input_ids to generate", "Short and"]
|
|
||||||
tokenizer = BartTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
|
||||||
model = BartForConditionalGeneration.from_pretrained("hf-internal-testing/tiny-random-bart").to(torch_device)
|
|
||||||
|
|
||||||
inputs = tokenizer(articles, return_tensors="pt", padding=True).to(torch_device)
|
|
||||||
|
|
||||||
result = model.generate(
|
|
||||||
**inputs,
|
|
||||||
max_length=3,
|
|
||||||
return_dict_in_generate=True,
|
|
||||||
do_sample=False,
|
|
||||||
num_beams=1,
|
|
||||||
output_scores=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# encoder-decoder has one decoder_start_token_id by default
|
|
||||||
begin_generation = 1
|
|
||||||
|
|
||||||
gen_sequences = result.sequences[:, begin_generation:]
|
|
||||||
probs = torch.stack(result.scores, dim=1).softmax(-1)
|
|
||||||
|
|
||||||
gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)
|
|
||||||
expected_probs = torch.tensor([[0.0013, 1.0000], [0.0013, 1.0000]])
|
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(gen_probs.cpu(), expected_probs, atol=1e-3))
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_beam_search_example_integration(self):
|
def test_beam_search_example_integration(self):
|
||||||
|
# PT-only test: TF doesn't have a BeamSearchScorer
|
||||||
# exactly the example provided in the docstrings of beam search, which previously
|
# exactly the example provided in the docstrings of beam search, which previously
|
||||||
# failed after directly copying from it. Refer to PR #15555
|
# failed after directly copying from it. Refer to PR #15555
|
||||||
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||||
@@ -2288,6 +2192,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_constrained_beam_search(self):
|
def test_constrained_beam_search(self):
|
||||||
|
# PT-only test: TF doesn't have constrained beam search
|
||||||
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||||
|
|
||||||
@@ -2325,6 +2230,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_constrained_beam_search_mixed(self):
|
def test_constrained_beam_search_mixed(self):
|
||||||
|
# PT-only test: TF doesn't have constrained beam search
|
||||||
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||||
|
|
||||||
@@ -2365,6 +2271,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_constrained_beam_search_mixed_mixin(self):
|
def test_constrained_beam_search_mixed_mixin(self):
|
||||||
|
# PT-only test: TF doesn't have constrained beam search
|
||||||
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
model = GPT2LMHeadModel.from_pretrained("gpt2").to(torch_device)
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||||
|
|
||||||
@@ -2402,6 +2309,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_constrained_beam_search_example_translation_mixin(self):
|
def test_constrained_beam_search_example_translation_mixin(self):
|
||||||
|
# PT-only test: TF doesn't have constrained beam search
|
||||||
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||||
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
||||||
|
|
||||||
@@ -2426,6 +2334,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_constrained_beam_search_example_integration(self):
|
def test_constrained_beam_search_example_integration(self):
|
||||||
|
# PT-only test: TF doesn't have constrained beam search
|
||||||
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
||||||
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
||||||
|
|
||||||
@@ -2469,6 +2378,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
self.assertListEqual(outputs, ["Wie alt sind Sie?"])
|
self.assertListEqual(outputs, ["Wie alt sind Sie?"])
|
||||||
|
|
||||||
def test_constrained_beam_search_mixin_type_checks(self):
|
def test_constrained_beam_search_mixin_type_checks(self):
|
||||||
|
# PT-only test: TF doesn't have constrained beam search
|
||||||
tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/t5-tiny-random")
|
tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/t5-tiny-random")
|
||||||
model = AutoModelForSeq2SeqLM.from_pretrained("patrickvonplaten/t5-tiny-random")
|
model = AutoModelForSeq2SeqLM.from_pretrained("patrickvonplaten/t5-tiny-random")
|
||||||
|
|
||||||
@@ -2509,6 +2419,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
model.generate(input_ids, force_words_ids=[[[-1]]])
|
model.generate(input_ids, force_words_ids=[[[-1]]])
|
||||||
|
|
||||||
def test_contrastive_search_batched(self):
|
def test_contrastive_search_batched(self):
|
||||||
|
# PT-only test: TF doesn't have constrained beam search
|
||||||
# Tests that contrastive search works with batched inputs (i.e. has the same output as for non-batched inputs)
|
# Tests that contrastive search works with batched inputs (i.e. has the same output as for non-batched inputs)
|
||||||
articles = ["Foo", "Bar Baz"]
|
articles = ["Foo", "Bar Baz"]
|
||||||
tokenizer = BartTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
tokenizer = BartTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||||
@@ -2533,104 +2444,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
max_score_diff = (output_sequences_batched.scores[0][1] - output_sequences.scores[0][0]).abs().max()
|
max_score_diff = (output_sequences_batched.scores[0][1] - output_sequences.scores[0][0]).abs().max()
|
||||||
self.assertTrue(max_score_diff < 1e-5)
|
self.assertTrue(max_score_diff < 1e-5)
|
||||||
|
|
||||||
def test_eos_token_id_int_and_list_greedy_search(self):
|
|
||||||
generation_kwargs = {
|
|
||||||
"do_sample": False,
|
|
||||||
"num_beams": 1,
|
|
||||||
}
|
|
||||||
expectation = 13
|
|
||||||
|
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
|
||||||
text = """Hello, my dog is cute and"""
|
|
||||||
tokens = tokenizer(text, return_tensors="pt").to(torch_device)
|
|
||||||
|
|
||||||
model = GPT2LMHeadModel.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
|
||||||
eos_token_id = 873
|
|
||||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
|
||||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
|
||||||
eos_token_id = [873, 198]
|
|
||||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
|
||||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
|
||||||
|
|
||||||
def test_eos_token_id_int_and_list_contrastive_search(self):
|
|
||||||
generation_kwargs = {
|
|
||||||
"do_sample": False,
|
|
||||||
"num_beams": 1,
|
|
||||||
"penalty_alpha": 0.6,
|
|
||||||
"top_k": 4,
|
|
||||||
}
|
|
||||||
expectation = 17
|
|
||||||
|
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
|
||||||
text = """Hello, my dog is cute and"""
|
|
||||||
tokens = tokenizer(text, return_tensors="pt").to(torch_device)
|
|
||||||
|
|
||||||
model = GPT2LMHeadModel.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
|
||||||
eos_token_id = 225
|
|
||||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
|
||||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
|
||||||
eos_token_id = [225, 198]
|
|
||||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
|
||||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
|
||||||
|
|
||||||
def test_eos_token_id_int_and_list_top_k_top_sampling(self):
|
|
||||||
generation_kwargs = {
|
|
||||||
"do_sample": True,
|
|
||||||
"num_beams": 1,
|
|
||||||
"top_p": 0.7,
|
|
||||||
"top_k": 10,
|
|
||||||
"temperature": 0.7,
|
|
||||||
}
|
|
||||||
expectation = 15
|
|
||||||
|
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
|
||||||
text = """Hello, my dog is cute and"""
|
|
||||||
tokens = tokenizer(text, return_tensors="pt").to(torch_device)
|
|
||||||
|
|
||||||
model = GPT2LMHeadModel.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
|
||||||
eos_token_id = 846
|
|
||||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
|
||||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
|
||||||
eos_token_id = [846, 198]
|
|
||||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
|
||||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
|
||||||
|
|
||||||
def test_eos_token_id_int_and_list_beam_search(self):
|
|
||||||
generation_kwargs = {
|
|
||||||
"do_sample": False,
|
|
||||||
"num_beams": 3,
|
|
||||||
}
|
|
||||||
expectation = 13
|
|
||||||
|
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
|
||||||
text = """Hello, my dog is cute and"""
|
|
||||||
tokens = tokenizer(text, return_tensors="pt").to(torch_device)
|
|
||||||
|
|
||||||
model = GPT2LMHeadModel.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
|
||||||
eos_token_id = 873
|
|
||||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
|
||||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
|
||||||
eos_token_id = [873, 198]
|
|
||||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
|
||||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
|
||||||
|
|
||||||
def test_generate_from_input_embeds_decoder_only(self):
|
def test_generate_from_input_embeds_decoder_only(self):
|
||||||
|
# PT-only test: TF doesn't have a model with support to generate from input embeds (yet ;))
|
||||||
# Note: the model must support generation from input embeddings
|
# Note: the model must support generation from input embeddings
|
||||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
@@ -2652,3 +2467,29 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||||||
outputs_from_rand_embeds = model.generate(input_ids, inputs_embeds=random_embeds)
|
outputs_from_rand_embeds = model.generate(input_ids, inputs_embeds=random_embeds)
|
||||||
with self.assertRaises(AssertionError):
|
with self.assertRaises(AssertionError):
|
||||||
self.assertListEqual(outputs_from_rand_embeds.tolist(), outputs_from_embeds.tolist())
|
self.assertListEqual(outputs_from_rand_embeds.tolist(), outputs_from_embeds.tolist())
|
||||||
|
|
||||||
|
def test_eos_token_id_int_and_list_top_k_top_sampling(self):
|
||||||
|
# Has TF equivalent: this test relies on random sampling
|
||||||
|
generation_kwargs = {
|
||||||
|
"do_sample": True,
|
||||||
|
"num_beams": 1,
|
||||||
|
"top_p": 0.7,
|
||||||
|
"top_k": 10,
|
||||||
|
"temperature": 0.7,
|
||||||
|
}
|
||||||
|
expectation = 15
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||||
|
text = """Hello, my dog is cute and"""
|
||||||
|
tokens = tokenizer(text, return_tensors="pt").to(torch_device)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||||
|
|
||||||
|
torch.manual_seed(0)
|
||||||
|
eos_token_id = 846
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||||
|
|
||||||
|
torch.manual_seed(0)
|
||||||
|
eos_token_id = [846, 198]
|
||||||
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||||
|
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||||
|
|||||||
Reference in New Issue
Block a user