[tests] remove tf/flax tests in /generation (#36235)

This commit is contained in:
Joao Gante
2025-02-17 14:59:22 +00:00
committed by GitHub
parent c877c9fa5b
commit 55493f1390
26 changed files with 428 additions and 2663 deletions

View File

@@ -416,82 +416,6 @@ class TFSpeech2TextModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.T
def test_generate_without_input_ids(self):
pass
# overwritten from parent due to the inability to work when non-text inputs are not passed AND because the input is
# `input_features`
def test_lm_head_model_random_no_beam_search_generate(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
input_features = inputs_dict.get("input_features", None)
# iterate over all generative models
for model_class in self.all_generative_model_classes:
model = model_class(config)
if config.bos_token_id is None:
# if bos token id is not defined model needs input_features
with self.assertRaises(AssertionError):
model.generate(do_sample=True, max_length=5)
# num_return_sequences = 1
self._check_generated_ids(model.generate(input_features, do_sample=True))
with self.assertRaises(ValueError):
# generating multiple sequences when no beam search generation
# is not allowed as it would always generate the same sequences
model.generate(input_features, do_sample=False, num_return_sequences=2)
# num_return_sequences > 1, sample
self._check_generated_ids(model.generate(input_features, do_sample=True, num_return_sequences=2))
# check bad words tokens language generation
# create list of 1-seq bad token and list of 2-seq of bad tokens
bad_words_ids = [self._generate_random_bad_tokens(1, model), self._generate_random_bad_tokens(2, model)]
output_tokens = model.generate(
input_features, do_sample=True, bad_words_ids=bad_words_ids, num_return_sequences=2
)
# only count generated tokens
generated_ids = output_tokens[:, input_features.shape[-1] :]
self.assertFalse(self._check_match_tokens(generated_ids.numpy().tolist(), bad_words_ids))
# overwritten from parent due to the inability to work when non-text inputs are not passed AND because the input is
# `input_features`
def test_lm_head_model_random_beam_search_generate(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
input_features = inputs_dict.get("input_features", None)
for model_class in self.all_generative_model_classes:
model = model_class(config)
if config.bos_token_id is None:
# if bos token id is not defined model needs input_ids, num_return_sequences = 1
self._check_generated_ids(model.generate(input_features, do_sample=True, num_beams=2))
with self.assertRaises(ValueError):
# generating more sequences than having beams leads is not possible
model.generate(input_features, do_sample=False, num_return_sequences=3, num_beams=2)
# num_return_sequences > 1, sample
self._check_generated_ids(
model.generate(
input_features,
do_sample=True,
num_beams=2,
num_return_sequences=2,
)
)
# num_return_sequences > 1, greedy
self._check_generated_ids(
model.generate(input_features, do_sample=False, num_beams=2, num_return_sequences=2)
)
# check bad words tokens language generation
# create list of 1-seq bad token and list of 2-seq of bad tokens
bad_words_ids = [self._generate_random_bad_tokens(1, model), self._generate_random_bad_tokens(2, model)]
output_tokens = model.generate(
input_features, do_sample=False, bad_words_ids=bad_words_ids, num_beams=2, num_return_sequences=2
)
# only count generated tokens
generated_ids = output_tokens[:, input_features.shape[-1] :]
self.assertFalse(self._check_match_tokens(generated_ids.numpy().tolist(), bad_words_ids))
# overwritten from parent -- the input is `input_features`, not `input_ids`
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()