[tests] remove tf/flax tests in /generation (#36235)
This commit is contained in:
@@ -70,16 +70,6 @@ if is_tf_available():
|
||||
TFAutoModelForSequenceClassification,
|
||||
TFSharedEmbeddings,
|
||||
)
|
||||
from transformers.generation import (
|
||||
TFBeamSampleDecoderOnlyOutput,
|
||||
TFBeamSampleEncoderDecoderOutput,
|
||||
TFBeamSearchDecoderOnlyOutput,
|
||||
TFBeamSearchEncoderDecoderOutput,
|
||||
TFGreedySearchDecoderOnlyOutput,
|
||||
TFGreedySearchEncoderDecoderOutput,
|
||||
TFSampleDecoderOnlyOutput,
|
||||
TFSampleEncoderDecoderOutput,
|
||||
)
|
||||
from transformers.modeling_tf_utils import keras
|
||||
|
||||
tf.config.experimental.enable_tensor_float_32_execution(False)
|
||||
@@ -1211,150 +1201,6 @@ class TFModelTesterMixin:
|
||||
with self.assertRaises(tf.errors.InvalidArgumentError):
|
||||
model(**prepared_inputs)
|
||||
|
||||
def test_lm_head_model_random_no_beam_search_generate(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
input_ids = inputs_dict.get("input_ids", None)
|
||||
|
||||
# iterate over all generative models
|
||||
for model_class in self.all_generative_model_classes:
|
||||
model = model_class(config)
|
||||
|
||||
if config.bos_token_id is None:
|
||||
# if bos token id is not defined model needs input_ids
|
||||
with self.assertRaises(ValueError):
|
||||
model.generate(do_sample=True, max_length=5)
|
||||
# num_return_sequences = 1
|
||||
self._check_generated_ids(model.generate(input_ids, do_sample=True))
|
||||
elif model_class.__name__ not in ["TFSpeech2TextForConditionalGeneration"]:
|
||||
# Models with non-text inputs won't work here; num_return_sequences = 1
|
||||
self._check_generated_ids(model.generate(do_sample=True, max_length=5))
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
# generating multiple sequences when no beam search generation
|
||||
# is not allowed as it would always generate the same sequences
|
||||
model.generate(input_ids, do_sample=False, num_return_sequences=2)
|
||||
|
||||
# num_return_sequences > 1, sample
|
||||
self._check_generated_ids(model.generate(input_ids, do_sample=True, num_return_sequences=2))
|
||||
|
||||
# check bad words tokens language generation
|
||||
# create list of 1-seq bad token and list of 2-seq of bad tokens
|
||||
bad_words_ids = [self._generate_random_bad_tokens(1, model), self._generate_random_bad_tokens(2, model)]
|
||||
output_tokens = model.generate(
|
||||
input_ids, do_sample=True, bad_words_ids=bad_words_ids, num_return_sequences=2
|
||||
)
|
||||
# only count generated tokens
|
||||
generated_ids = output_tokens[:, input_ids.shape[-1] :]
|
||||
self.assertFalse(self._check_match_tokens(generated_ids.numpy().tolist(), bad_words_ids))
|
||||
|
||||
def test_lm_head_model_no_beam_search_generate_dict_outputs(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
input_ids = inputs_dict.get("input_ids", None)
|
||||
if input_ids is None:
|
||||
input_ids = inputs_dict.get("input_features", None)
|
||||
|
||||
# iterate over all generative models
|
||||
for model_class in self.all_generative_model_classes:
|
||||
model = model_class(config)
|
||||
output_greedy = model.generate(
|
||||
input_ids,
|
||||
do_sample=False,
|
||||
output_scores=True,
|
||||
output_hidden_states=True,
|
||||
output_attentions=True,
|
||||
return_dict_in_generate=True,
|
||||
)
|
||||
output_sample = model.generate(
|
||||
input_ids,
|
||||
do_sample=True,
|
||||
output_scores=True,
|
||||
output_hidden_states=True,
|
||||
output_attentions=True,
|
||||
return_dict_in_generate=True,
|
||||
)
|
||||
|
||||
if model.config.is_encoder_decoder:
|
||||
self.assertIsInstance(output_greedy, TFGreedySearchEncoderDecoderOutput)
|
||||
self.assertIsInstance(output_sample, TFSampleEncoderDecoderOutput)
|
||||
else:
|
||||
self.assertIsInstance(output_greedy, TFGreedySearchDecoderOnlyOutput)
|
||||
self.assertIsInstance(output_sample, TFSampleDecoderOnlyOutput)
|
||||
|
||||
def test_lm_head_model_random_beam_search_generate(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
input_ids = inputs_dict.get("input_ids", None)
|
||||
|
||||
for model_class in self.all_generative_model_classes:
|
||||
model = model_class(config)
|
||||
|
||||
if config.bos_token_id is None:
|
||||
# if bos token id is not defined model needs input_ids, num_return_sequences = 1
|
||||
self._check_generated_ids(model.generate(input_ids, do_sample=True, num_beams=2))
|
||||
else:
|
||||
# num_return_sequences = 1
|
||||
self._check_generated_ids(model.generate(do_sample=True, max_length=5, num_beams=2))
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
# generating more sequences than having beams leads is not possible
|
||||
model.generate(input_ids, do_sample=False, num_return_sequences=3, num_beams=2)
|
||||
|
||||
# num_return_sequences > 1, sample
|
||||
self._check_generated_ids(
|
||||
model.generate(
|
||||
input_ids,
|
||||
do_sample=True,
|
||||
num_beams=2,
|
||||
num_return_sequences=2,
|
||||
)
|
||||
)
|
||||
# num_return_sequences > 1, greedy
|
||||
self._check_generated_ids(model.generate(input_ids, do_sample=False, num_beams=2, num_return_sequences=2))
|
||||
|
||||
# check bad words tokens language generation
|
||||
# create list of 1-seq bad token and list of 2-seq of bad tokens
|
||||
bad_words_ids = [self._generate_random_bad_tokens(1, model), self._generate_random_bad_tokens(2, model)]
|
||||
output_tokens = model.generate(
|
||||
input_ids, do_sample=False, bad_words_ids=bad_words_ids, num_beams=2, num_return_sequences=2
|
||||
)
|
||||
# only count generated tokens
|
||||
generated_ids = output_tokens[:, input_ids.shape[-1] :]
|
||||
self.assertFalse(self._check_match_tokens(generated_ids.numpy().tolist(), bad_words_ids))
|
||||
|
||||
def test_lm_head_model_beam_search_generate_dict_outputs(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
input_ids = inputs_dict.get("input_ids", None)
|
||||
if input_ids is None:
|
||||
input_ids = inputs_dict.get("input_features", None)
|
||||
|
||||
# iterate over all generative models
|
||||
for model_class in self.all_generative_model_classes:
|
||||
model = model_class(config)
|
||||
output_beam_search = model.generate(
|
||||
input_ids,
|
||||
num_beams=2,
|
||||
do_sample=False,
|
||||
output_scores=True,
|
||||
output_hidden_states=True,
|
||||
output_attentions=True,
|
||||
return_dict_in_generate=True,
|
||||
)
|
||||
output_beam_sample = model.generate(
|
||||
input_ids,
|
||||
num_beams=2,
|
||||
do_sample=True,
|
||||
output_scores=True,
|
||||
output_hidden_states=True,
|
||||
output_attentions=True,
|
||||
return_dict_in_generate=True,
|
||||
)
|
||||
|
||||
if model.config.is_encoder_decoder:
|
||||
self.assertIsInstance(output_beam_search, TFBeamSearchEncoderDecoderOutput)
|
||||
self.assertIsInstance(output_beam_sample, TFBeamSampleEncoderDecoderOutput)
|
||||
else:
|
||||
self.assertIsInstance(output_beam_search, TFBeamSearchDecoderOnlyOutput)
|
||||
self.assertIsInstance(output_beam_sample, TFBeamSampleDecoderOnlyOutput)
|
||||
|
||||
def test_loss_computation(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
@@ -1574,40 +1420,6 @@ class TFModelTesterMixin:
|
||||
if tensor_spec.dtype.is_integer:
|
||||
self.assertTrue(tensor_spec.dtype == tf.int32, "Input signatures should use tf.int32 for ints!")
|
||||
|
||||
def test_generate_with_headmasking(self):
|
||||
attention_names = ["encoder_attentions", "decoder_attentions", "cross_attentions"]
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_generative_model_classes:
|
||||
model = model_class(config)
|
||||
|
||||
# We want to test only encoder-decoder models
|
||||
if not config.is_encoder_decoder:
|
||||
continue
|
||||
|
||||
head_masking = {
|
||||
"head_mask": tf.zeros((config.encoder_layers, config.encoder_attention_heads)),
|
||||
"decoder_head_mask": tf.zeros((config.decoder_layers, config.decoder_attention_heads)),
|
||||
"cross_attn_head_mask": tf.zeros((config.decoder_layers, config.decoder_attention_heads)),
|
||||
}
|
||||
|
||||
signature = inspect.signature(model.call)
|
||||
if set(head_masking.keys()) < {*signature.parameters.keys()}:
|
||||
continue
|
||||
|
||||
for attn_name, (name, mask) in zip(attention_names, head_masking.items()):
|
||||
out = model.generate(
|
||||
inputs_dict["input_ids"],
|
||||
num_beams=1,
|
||||
max_length=inputs_dict["input_ids"] + 5,
|
||||
output_attentions=True,
|
||||
return_dict_in_generate=True,
|
||||
**{name: mask},
|
||||
)
|
||||
# We check the state of decoder_attentions and cross_attentions just from the last step
|
||||
attn_weights = out[attn_name] if attn_name == attention_names[0] else out[attn_name][-1]
|
||||
self.assertEqual(sum([tf.reduce_sum(w).numpy() for w in attn_weights]), 0.0)
|
||||
|
||||
def test_load_with_mismatched_shapes(self):
|
||||
if not self.test_mismatched_shapes:
|
||||
return
|
||||
@@ -1717,133 +1529,6 @@ class TFModelTesterMixin:
|
||||
model.compile(optimizer="sgd", run_eagerly=True)
|
||||
model.train_on_batch(test_batch, test_batch_labels)
|
||||
|
||||
def _test_xla_generate(self, **generate_kwargs):
|
||||
def _generate_and_check_results(model, inputs, is_input_ids):
|
||||
# make sure there are no pad tokens in prompt, which may trigger unwanted behavior
|
||||
if is_input_ids:
|
||||
if model.generation_config.pad_token_id is not None:
|
||||
if config.pad_token_id == 0:
|
||||
new_pad_token = model.generation_config.pad_token_id + 1
|
||||
else:
|
||||
new_pad_token = model.generation_config.pad_token_id - 1
|
||||
else:
|
||||
new_pad_token = None
|
||||
inputs = tf.where(inputs != model.generation_config.pad_token_id, inputs, new_pad_token)
|
||||
|
||||
generated = model.generate(inputs, **generate_kwargs).numpy()
|
||||
generate_xla = tf.function(model.generate, jit_compile=True)
|
||||
generated_xla = generate_xla(inputs, **generate_kwargs).numpy()
|
||||
|
||||
# Due to numerical instability, let's fail the test only if there are more than 10% of input sequences give
|
||||
# different outputs between XLA and non-XLA versions. If there are less than 10 examples, let's be strict
|
||||
# and not allow any difference.
|
||||
diff = [[], []]
|
||||
for _generated, _generated_xla in zip(generated.tolist(), generated_xla.tolist()):
|
||||
if _generated != _generated_xla:
|
||||
diff[0].append(_generated)
|
||||
diff[1].append(_generated_xla)
|
||||
ratio = len(diff[0]) / len(generated)
|
||||
if ratio > 0.1 or (len(diff[0]) > 0 and len(generated) < 10):
|
||||
self.assertListEqual(diff[0], diff[1])
|
||||
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.eos_token_id = None # Generate until max length
|
||||
config.do_sample = False
|
||||
|
||||
# extract the input to the model
|
||||
is_input_ids = "input_ids" in inputs_dict
|
||||
is_input_features = "input_features" in inputs_dict
|
||||
if not (is_input_ids or is_input_features):
|
||||
raise ValueError("No valid generate input found in inputs_dict")
|
||||
inputs = inputs_dict["input_ids"] if is_input_ids else inputs_dict["input_features"]
|
||||
|
||||
# fix config for models with additional sequence-length limiting settings
|
||||
seq_len = inputs.get_shape()[1]
|
||||
for var_name in ["max_position_embeddings", "max_target_positions"]:
|
||||
attr = getattr(config, var_name, None)
|
||||
if attr is not None and attr < seq_len + generate_kwargs["max_new_tokens"]:
|
||||
try:
|
||||
setattr(config, var_name, seq_len + generate_kwargs["max_new_tokens"])
|
||||
except NotImplementedError:
|
||||
# xlnet will raise an exception when trying to set
|
||||
# max_position_embeddings.
|
||||
pass
|
||||
|
||||
model = model_class(config)
|
||||
|
||||
if model.supports_xla_generation:
|
||||
_generate_and_check_results(model, inputs, is_input_ids)
|
||||
else:
|
||||
with self.assertRaises(ValueError):
|
||||
_generate_and_check_results(model, inputs, is_input_ids)
|
||||
|
||||
def test_xla_generate_fast(self):
|
||||
"""
|
||||
Basic quick test for generate-compatible classes that confirms that XLA-generated tokens are the same as their
|
||||
non XLA counterparts.
|
||||
|
||||
Either the model supports XLA generation and passes the inner test, or it raises an appropriate exception
|
||||
"""
|
||||
self._test_xla_generate(num_beams=1, num_return_sequences=1, max_new_tokens=3)
|
||||
|
||||
@slow
|
||||
def test_xla_generate_contrastive(self):
|
||||
"""
|
||||
Slow and challenging version of `test_xla_generate_fast` for contrastive search -- contrastive search directly
|
||||
manipulates the model cache and other outputs, and this test ensures that they are in a valid format that is
|
||||
also supported by XLA.
|
||||
|
||||
Either the model supports XLA generation and passes the inner test, or it raises an appropriate exception
|
||||
"""
|
||||
self._test_xla_generate(num_beams=1, num_return_sequences=1, max_new_tokens=16, penalty_alpha=0.5, top_k=4)
|
||||
|
||||
@slow
|
||||
def test_xla_generate_slow(self):
|
||||
"""
|
||||
Slow and challenging version of `test_xla_generate_fast` -- this test asks for several long sequences using
|
||||
beam search, with and without XLA. The two outputs should match, and a failure in this test indicates that the
|
||||
model may need further analysis if it is to be used for XLA generation.
|
||||
|
||||
Either the model supports XLA generation and passes the inner test, or it raises an appropriate exception
|
||||
"""
|
||||
self._test_xla_generate(num_beams=8, num_return_sequences=2, max_new_tokens=128)
|
||||
|
||||
def _generate_random_bad_tokens(self, num_bad_tokens, model):
|
||||
# special tokens cannot be bad tokens
|
||||
special_tokens = []
|
||||
if model.config.bos_token_id is not None:
|
||||
special_tokens.append(model.config.bos_token_id)
|
||||
if model.config.pad_token_id is not None:
|
||||
special_tokens.append(model.config.pad_token_id)
|
||||
if model.config.eos_token_id is not None:
|
||||
special_tokens.append(model.config.eos_token_id)
|
||||
|
||||
# create random bad tokens that are not special tokens
|
||||
bad_tokens = []
|
||||
while len(bad_tokens) < num_bad_tokens:
|
||||
token = tf.squeeze(ids_tensor((1, 1), self.model_tester.vocab_size), 0).numpy()[0]
|
||||
if token not in special_tokens:
|
||||
bad_tokens.append(token)
|
||||
return bad_tokens
|
||||
|
||||
def _check_generated_ids(self, output_ids):
|
||||
for token_id in output_ids[0].numpy().tolist():
|
||||
self.assertGreaterEqual(token_id, 0)
|
||||
self.assertLess(token_id, self.model_tester.vocab_size)
|
||||
|
||||
def _check_match_tokens(self, generated_ids, bad_words_ids):
|
||||
# for all bad word tokens
|
||||
for bad_word_ids in bad_words_ids:
|
||||
# for all slices in batch
|
||||
for generated_ids_slice in generated_ids:
|
||||
# for all word idx
|
||||
for i in range(len(bad_word_ids), len(generated_ids_slice)):
|
||||
# if tokens match
|
||||
if generated_ids_slice[i - len(bad_word_ids) : i] == bad_word_ids:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None):
|
||||
"""Creates a random int32 tensor of the shape within the vocab size."""
|
||||
|
||||
Reference in New Issue
Block a user