[tests] remove tf/flax tests in /generation (#36235)
This commit is contained in:
@@ -49,7 +49,6 @@ from transformers.testing_utils import (
|
||||
from transformers.utils import is_ipex_available
|
||||
|
||||
from ..test_modeling_common import floats_tensor, ids_tensor
|
||||
from .test_framework_agnostic import GenerationIntegrationTestsMixin
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -2783,24 +2782,9 @@ class UtilsFunctionsTest(unittest.TestCase):
|
||||
|
||||
@pytest.mark.generate
|
||||
@require_torch
|
||||
class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMixin):
|
||||
# setting framework_dependent_parameters needs to be gated, just like its contents' imports
|
||||
if is_torch_available():
|
||||
framework_dependent_parameters = {
|
||||
"AutoModelForCausalLM": AutoModelForCausalLM,
|
||||
"AutoModelForSpeechSeq2Seq": AutoModelForSpeechSeq2Seq,
|
||||
"AutoModelForSeq2SeqLM": AutoModelForSeq2SeqLM,
|
||||
"AutoModelForVision2Seq": AutoModelForVision2Seq,
|
||||
"LogitsProcessorList": LogitsProcessorList,
|
||||
"MinLengthLogitsProcessor": MinLengthLogitsProcessor,
|
||||
"create_tensor_fn": torch.tensor,
|
||||
"floats_tensor": floats_tensor,
|
||||
"return_tensors": "pt",
|
||||
}
|
||||
|
||||
class GenerationIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_diverse_beam_search(self):
|
||||
# PT-only test: TF doesn't have a diverse beam search implementation
|
||||
article = """Justin Timberlake and Jessica Biel, welcome to parenthood.
|
||||
The celebrity couple announced the arrival of their son, Silas Randall Timberlake, in statements to People.
|
||||
"Silas was the middle name of Timberlake's maternal grandfather Bill Bomar, who died in 2012, while Randall is the musician's own middle name, as well as his father's first," People reports.
|
||||
@@ -2834,7 +2818,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
)
|
||||
|
||||
def test_max_length_if_input_embeds(self):
|
||||
# PT-only test: TF doesn't have StoppingCriteria
|
||||
article = "Today a dragon flew over Paris."
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
@@ -2848,7 +2831,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertEqual(out_gen.shape[-1], input_len + out_gen_embeds.shape[-1])
|
||||
|
||||
def test_min_length_if_input_embeds(self):
|
||||
# PT-only test: TF doesn't have StoppingCriteria
|
||||
article = "Today a dragon flew over Paris."
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
@@ -2862,7 +2844,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertEqual(out_gen.shape[-1], input_len + out_gen_embeds.shape[-1])
|
||||
|
||||
def test_custom_stopping_criteria_overload_error(self):
|
||||
# PT-only test: TF doesn't have StoppingCriteria
|
||||
article = """Justin Timberlake and Jessica Biel, welcome to parenthood."""
|
||||
bart_tokenizer = BartTokenizer.from_pretrained("sshleifer/bart-tiny-random")
|
||||
bart_model = BartForConditionalGeneration.from_pretrained("sshleifer/bart-tiny-random").to(torch_device)
|
||||
@@ -2876,7 +2857,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
bart_model.generate(input_ids, stopping_criteria=stopping_criteria, max_length=32)
|
||||
|
||||
def test_custom_stopping_criteria(self):
|
||||
# PT-only test: TF doesn't have StoppingCriteria
|
||||
article = """Justin Timberlake and Jessica Biel, welcome to parenthood."""
|
||||
bart_tokenizer = BartTokenizer.from_pretrained("sshleifer/bart-tiny-random")
|
||||
bart_model = BartForConditionalGeneration.from_pretrained("sshleifer/bart-tiny-random").to(torch_device)
|
||||
@@ -2900,7 +2880,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
|
||||
# TODO (joao): replace `stop_sequence` in the pipeline by the more recent `generate` functionality
|
||||
def test_stop_sequence_stopping_criteria(self):
|
||||
# PT-only test: TF doesn't have StoppingCriteria
|
||||
prompt = """Hello I believe in"""
|
||||
generator = pipeline("text-generation", model="hf-internal-testing/tiny-random-bart")
|
||||
output = generator(prompt)
|
||||
@@ -2913,7 +2892,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertEqual(output, [{"generated_text": "Hello I believe in we"}])
|
||||
|
||||
def test_generate_non_nlp_input_ids_as_kwarg(self):
|
||||
# PT-only test: AFAIK there's no non-NLP model architecture in TF that supports `input_ids` as its only input
|
||||
model = ImageGPTForCausalImageModeling.from_pretrained(
|
||||
"hf-internal-testing/tiny-random-imagegpt", max_length=10
|
||||
).to(torch_device)
|
||||
@@ -2926,7 +2904,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertEqual(output_sequences.shape, (3, 10))
|
||||
|
||||
def test_generate_input_values_as_encoder_kwarg(self):
|
||||
# PT-only test: AFAIK there's no generate-capable architecture in TF that supports `input_values` as its input
|
||||
input_values = floats_tensor((2, 250))
|
||||
model = SpeechEncoderDecoderModel.from_pretrained("hf-internal-testing/tiny-random-speech-encoder-decoder")
|
||||
model = model.to(torch_device)
|
||||
@@ -2937,7 +2914,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertEqual(output_sequences.shape, (2, 5))
|
||||
|
||||
def test_transition_scores_group_beam_search_encoder_decoder(self):
|
||||
# PT-only test: TF doesn't have group beam search
|
||||
articles = [
|
||||
"Justin Timberlake and Jessica Biel, welcome to parenthood.",
|
||||
"Michael Phelps is arguably the most decorated Olympian of all time.",
|
||||
@@ -3067,7 +3043,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
|
||||
@slow
|
||||
def test_beam_search_example_integration(self):
|
||||
# PT-only test: TF doesn't have a BeamSearchScorer
|
||||
# exactly the example provided in the docstrings of beam search, which previously
|
||||
# failed after directly copying from it. Refer to PR #15555
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
|
||||
@@ -3094,7 +3069,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
|
||||
@slow
|
||||
def test_constrained_beam_search(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
@@ -3132,7 +3106,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
|
||||
@slow
|
||||
def test_constrained_beam_search_mixed(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
@@ -3173,7 +3146,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
|
||||
@slow
|
||||
def test_constrained_beam_search_mixed_mixin(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2").to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
||||
|
||||
@@ -3251,7 +3223,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
|
||||
@slow
|
||||
def test_constrained_beam_search_example_translation_mixin(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
|
||||
|
||||
@@ -3276,7 +3247,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
|
||||
@slow
|
||||
def test_constrained_beam_search_example_integration(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
|
||||
|
||||
@@ -3345,7 +3315,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertListEqual(out_text, expected_out)
|
||||
|
||||
def test_constrained_beam_search_mixin_type_checks(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/t5-tiny-random")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("patrickvonplaten/t5-tiny-random")
|
||||
|
||||
@@ -3386,7 +3355,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
model.generate(input_ids, force_words_ids=[[[-1]]])
|
||||
|
||||
def test_batched_decoder_start_id(self):
|
||||
# PT-only test: TF doesn't support batched_decoder_start_id
|
||||
articles = [
|
||||
"Justin Timberlake and Jessica Biel, welcome to parenthood.",
|
||||
"Michael Phelps is arguably the most decorated Olympian of all time.",
|
||||
@@ -3435,7 +3403,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
outputs = bart_model.generate(input_ids, generation_config=GenerationConfig(do_sample=False))
|
||||
|
||||
def test_contrastive_search_batched(self):
|
||||
# PT-only test: TF doesn't have constrained beam search
|
||||
# Tests that contrastive search works with batched inputs (i.e. has the same output as for non-batched inputs)
|
||||
articles = ["Foo", "Bar Baz"]
|
||||
tokenizer = BartTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
@@ -3461,7 +3428,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertTrue(max_score_diff < 1e-5)
|
||||
|
||||
def test_logits_processor_not_inplace(self):
|
||||
# PT-only test: TF fixes were not made
|
||||
article = "Today a dragon flew over Paris."
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
@@ -3572,7 +3538,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertEqual(len(warning_list), 0)
|
||||
|
||||
def test_length_warning_assisted_generation(self):
|
||||
# PT-only test: TF doesn't support assisted decoding yet.
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||
assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
@@ -3604,7 +3569,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertEqual(config.is_assistant, False)
|
||||
|
||||
def test_generated_length_assisted_generation(self):
|
||||
# PT-only test: TF doesn't support assisted decoding yet.
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||
assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
@@ -3639,7 +3603,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertTrue(out.shape[-1] <= (input_length + 7))
|
||||
|
||||
def test_model_kwarg_assisted_decoding_decoder_only(self):
|
||||
# PT-only test: TF doesn't support assisted decoding yet.
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2").to(torch_device)
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
model.generation_config.pad_token_id = tokenizer.eos_token_id
|
||||
@@ -3839,7 +3802,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
@slow
|
||||
@require_torch_multi_gpu
|
||||
def test_assisted_decoding_in_different_gpu(self):
|
||||
# PT-only test: TF doesn't support assisted decoding yet.
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to("cuda:0")
|
||||
assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
|
||||
"cuda:1"
|
||||
@@ -3863,7 +3825,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
@slow
|
||||
@require_torch_accelerator
|
||||
def test_assisted_decoding_model_in_gpu_assistant_in_cpu(self):
|
||||
# PT-only test: TF doesn't support assisted decoding yet.
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
|
||||
torch_device
|
||||
)
|
||||
@@ -3887,7 +3848,6 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
self.assertTrue(input_length <= out.shape[-1] <= input_length + 20)
|
||||
|
||||
def test_special_tokens_fall_back_to_model_default(self):
|
||||
# PT-only test: TF doesn't support assisted decoding yet.
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
|
||||
torch_device
|
||||
)
|
||||
@@ -4367,6 +4327,416 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
duration = datetime.datetime.now() - start
|
||||
self.assertGreater(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
|
||||
|
||||
def test_validate_generation_inputs(self):
|
||||
"""Tests validation of inputs to `generate`"""
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-t5")
|
||||
|
||||
encoder_input_str = "Hello world"
|
||||
input_ids = tokenizer(encoder_input_str, return_tensors="pt").input_ids
|
||||
|
||||
# typos are quickly detected (the correct argument is `do_sample`)
|
||||
with self.assertRaisesRegex(ValueError, "do_samples"):
|
||||
model.generate(input_ids, do_samples=True)
|
||||
|
||||
# arbitrary arguments that will not be used anywhere are also not accepted
|
||||
with self.assertRaisesRegex(ValueError, "foo"):
|
||||
fake_model_kwargs = {"foo": "bar"}
|
||||
model.generate(input_ids, **fake_model_kwargs)
|
||||
|
||||
# however, valid model_kwargs are accepted
|
||||
valid_model_kwargs = {"attention_mask": torch.tensor(np.zeros_like(input_ids))}
|
||||
model.generate(input_ids, **valid_model_kwargs)
|
||||
|
||||
def test_custom_logits_processor(self):
|
||||
"""Tests that custom logits processors can be used in `generate`, and that redundant arguments are caught."""
|
||||
bart_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
article = """Justin Timberlake and Jessica Biel, welcome to parenthood."""
|
||||
bart_model = AutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-bart", min_length=1)
|
||||
input_ids = bart_tokenizer(article, return_tensors="pt").input_ids
|
||||
|
||||
logits_processor = LogitsProcessorList()
|
||||
logits_processor.append(MinLengthLogitsProcessor(min_length=10, eos_token_id=0))
|
||||
|
||||
# it should not be allowed to both define `min_length` via config and `logits_processor` list
|
||||
with self.assertRaises(ValueError):
|
||||
bart_model.generate(input_ids, logits_processor=logits_processor, min_length=10)
|
||||
bart_model.generate(input_ids, logits_processor=logits_processor)
|
||||
|
||||
def test_transition_scores_greedy_search(self):
|
||||
"""Test that `compute_transition_scores` is working as expected with gready search"""
|
||||
articles = ["Justin Timberlake", "Michael Phelps"]
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2", padding_side="left")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
|
||||
model.generation_config.eos_token_id = None
|
||||
input_ids = tokenizer(articles, return_tensors="pt", padding=True).input_ids
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids=input_ids,
|
||||
max_new_tokens=5,
|
||||
pad_token_id=tokenizer.eos_token_id,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True,
|
||||
)
|
||||
|
||||
transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores)
|
||||
transition_scores = transition_scores.cpu().numpy()
|
||||
|
||||
expected_scores = np.array(
|
||||
[
|
||||
[-57.8844, -60.45698, -70.16364, -65.50791, -66.35648],
|
||||
[-54.417572, -60.216614, -62.661243, -58.621933, -58.298683],
|
||||
]
|
||||
)
|
||||
self.assertTrue(np.allclose(transition_scores, expected_scores, atol=1e-3))
|
||||
|
||||
def test_transition_scores_greedy_search_normalized(self):
|
||||
"""
|
||||
Test that `compute_transition_scores` is working as expected with gready search, with `normalize_logits=True`
|
||||
"""
|
||||
articles = ["Justin Timberlake", "Michael Phelps"]
|
||||
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2", padding_side="left")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
|
||||
model.generation_config.eos_token_id = None
|
||||
input_ids = tokenizer(articles, return_tensors="pt", padding=True).input_ids
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids=input_ids,
|
||||
max_new_tokens=5,
|
||||
pad_token_id=tokenizer.eos_token_id,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True,
|
||||
)
|
||||
|
||||
transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
|
||||
transition_scores = transition_scores.cpu().numpy()
|
||||
|
||||
expected_scores = np.array(
|
||||
[
|
||||
[-2.538938, -2.2694316, -2.1580915, -1.572299, -2.6719835],
|
||||
[-1.8826028, -2.2461371, -1.7556462, -2.9644494, -1.7996008],
|
||||
]
|
||||
)
|
||||
self.assertTrue(np.allclose(transition_scores, expected_scores, atol=1e-3))
|
||||
|
||||
def test_transition_scores_beam_search_encoder_decoder(self):
|
||||
"""
|
||||
Test that `compute_transition_scores` is working as expected with beam search and encoder-decoder models
|
||||
"""
|
||||
articles = [
|
||||
"Justin Timberlake and Jessica Biel, welcome to parenthood.",
|
||||
"Michael Phelps is arguably the most decorated Olympian of all time.",
|
||||
]
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
input_ids = tokenizer(articles, return_tensors="pt", padding=True).input_ids
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids=input_ids,
|
||||
max_length=10,
|
||||
num_beams=4,
|
||||
num_return_sequences=2,
|
||||
eos_token_id=None,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True,
|
||||
length_penalty=0.0,
|
||||
)
|
||||
|
||||
transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, outputs.beam_indices)
|
||||
transition_scores = transition_scores.cpu().numpy()
|
||||
outputs.sequences_scores = outputs.sequences_scores.cpu().numpy()
|
||||
|
||||
self.assertTrue(np.allclose(np.sum(transition_scores, axis=-1), outputs.sequences_scores, atol=1e-3))
|
||||
|
||||
def test_transition_scores_beam_search_encoder_decoder_with_eos(self):
|
||||
"""
|
||||
Test that `compute_transition_scores` is working as expected with beam search and encoder-decoder models, when
|
||||
an EOS token is defined
|
||||
"""
|
||||
articles = [
|
||||
"Justin Timberlake and Jessica Biel, welcome to parenthood.",
|
||||
"Michael Phelps is arguably the most decorated Olympian of all time.",
|
||||
]
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
input_ids = tokenizer(articles, return_tensors="pt", padding=True).input_ids
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids=input_ids,
|
||||
max_length=10,
|
||||
num_beams=4,
|
||||
num_return_sequences=2,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True,
|
||||
length_penalty=0.0,
|
||||
)
|
||||
|
||||
transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, outputs.beam_indices)
|
||||
transition_scores = transition_scores.cpu().numpy()
|
||||
outputs.sequences_scores = outputs.sequences_scores.cpu().numpy()
|
||||
|
||||
self.assertTrue(np.allclose(np.sum(transition_scores, axis=-1), outputs.sequences_scores, atol=1e-3))
|
||||
|
||||
def test_transition_scores_beam_search_decoder_only(self):
|
||||
"""
|
||||
Test that `compute_transition_scores` is working as expected with beam search and decoder-only models
|
||||
"""
|
||||
articles = [
|
||||
"Justin Timberlake",
|
||||
"Michael Phelps",
|
||||
]
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
input_ids = tokenizer(articles, return_tensors="pt", padding=True).input_ids
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids=input_ids,
|
||||
max_length=10,
|
||||
num_beams=4,
|
||||
num_return_sequences=2,
|
||||
pad_token_id=tokenizer.eos_token_id,
|
||||
eos_token_id=None,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True,
|
||||
length_penalty=0.0,
|
||||
)
|
||||
|
||||
transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, outputs.beam_indices)
|
||||
transition_scores = transition_scores.cpu().numpy()
|
||||
outputs.sequences_scores = outputs.sequences_scores.cpu().numpy()
|
||||
|
||||
self.assertTrue(np.allclose(np.sum(transition_scores, axis=-1), outputs.sequences_scores, atol=1e-3))
|
||||
|
||||
@slow
|
||||
def test_transition_scores_early_stopping(self):
|
||||
"""
|
||||
Test that `compute_transition_scores` is working as expected with beam search and early stopping
|
||||
|
||||
This is an aggressive test that makes sure that `beam_search's`
|
||||
transition scores are computed correctly for varying `num_return_sequences`, `num_beams` and `batch_size > 1`
|
||||
2 x input_ids for "question: How are you? \n context: I had a long day, "
|
||||
"""
|
||||
input_ids = torch.tensor(2 * [[822, 10, 571, 33, 25, 58, 2625, 10, 27, 141, 3, 9, 307, 239, 6, 1]])
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids,
|
||||
max_length=10,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True,
|
||||
forced_eos_token_id=model.config.eos_token_id,
|
||||
num_beams=4,
|
||||
do_sample=False,
|
||||
num_return_sequences=3,
|
||||
length_penalty=0.0,
|
||||
)
|
||||
|
||||
transition_scores = model.compute_transition_scores(
|
||||
sequences=outputs.sequences, scores=outputs.scores, beam_indices=outputs.beam_indices
|
||||
)
|
||||
transition_scores = transition_scores.cpu().numpy()
|
||||
outputs.sequences_scores = outputs.sequences_scores.cpu().numpy()
|
||||
|
||||
self.assertTrue(np.allclose(np.sum(transition_scores, axis=-1), outputs.sequences_scores))
|
||||
|
||||
def test_encoder_decoder_generate_attention_mask(self):
|
||||
"""
|
||||
Test that `generate` automagically creates the correct `attention_mask` for encoder-decoder models (which
|
||||
has a different keyword)
|
||||
"""
|
||||
articles = ["Timberlake", "Jessica Biel, welcome to parenthood among other things"]
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
# need extreme generation values here to force this test
|
||||
# to fail when `attention_mask` is not correctly treated in generate
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(
|
||||
"hf-internal-testing/tiny-random-bart",
|
||||
)
|
||||
model.config.eos_token_id = None
|
||||
input_ids = tokenizer(articles[0], return_tensors="pt").input_ids
|
||||
input_ids_batched = tokenizer(articles, padding=True, return_tensors="pt").input_ids
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
input_ids_batched = input_ids_batched.to(torch_device)
|
||||
|
||||
generate_kwargs = {
|
||||
"return_dict_in_generate": True,
|
||||
"output_scores": True,
|
||||
"max_length": 50,
|
||||
"num_beams": 5,
|
||||
"num_return_sequences": 5,
|
||||
}
|
||||
|
||||
output_sequences_batched = model.generate(input_ids=input_ids_batched, **generate_kwargs)
|
||||
output_sequences = model.generate(input_ids=input_ids, **generate_kwargs)
|
||||
|
||||
batched_out = output_sequences_batched.sequences_scores
|
||||
out = output_sequences.sequences_scores
|
||||
batched_out = batched_out.cpu().numpy()
|
||||
out = out.cpu().numpy()
|
||||
|
||||
diff = np.abs(np.sum(batched_out[:5]) - np.sum(out))
|
||||
self.assertTrue(diff < 1e-4)
|
||||
|
||||
def test_generate_input_ids_as_kwarg(self):
|
||||
"""Test that `input_ids` work equaly as a positional and keyword argument in decoder-only models"""
|
||||
article = "I need input_ids to generate"
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2", max_length=15)
|
||||
input_ids = tokenizer(article, return_tensors="pt").input_ids
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
output_sequences_kwargs = model.generate(input_ids=input_ids)
|
||||
output_sequences = model.generate(input_ids)
|
||||
output_sequences_kwargs = output_sequences_kwargs.cpu().numpy()
|
||||
output_sequences = output_sequences.cpu().numpy()
|
||||
|
||||
self.assertTrue(np.array_equal(output_sequences, output_sequences_kwargs))
|
||||
self.assertEqual(output_sequences.shape, (1, 15))
|
||||
|
||||
def test_generate_input_ids_as_encoder_kwarg(self):
|
||||
"""Test that `input_ids` work equaly as a positional and keyword argument in encoder-decoder models"""
|
||||
article = "Justin Timberlake and Jessica Biel, welcome to parenthood."
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
model.config.eos_token_id = None
|
||||
input_ids = tokenizer(article, return_tensors="pt").input_ids
|
||||
model = model.to(torch_device)
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
output_sequences_kwargs = model.generate(input_ids=input_ids, max_length=5)
|
||||
output_sequences = model.generate(input_ids, max_length=5)
|
||||
output_sequences_kwargs = output_sequences_kwargs.cpu().numpy()
|
||||
output_sequences = output_sequences.cpu().numpy()
|
||||
|
||||
self.assertTrue(np.array_equal(output_sequences, output_sequences_kwargs))
|
||||
self.assertEqual(output_sequences.shape, (1, 5))
|
||||
|
||||
def test_generate_inputs_and_encoder_kwargs(self):
|
||||
"""
|
||||
Test that an exception is thrown if the main tensor (`input_ids` in LLMs) is passed as both a positional and
|
||||
keyword argument
|
||||
"""
|
||||
article = "I need input_ids to generate"
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2", max_length=10)
|
||||
input_ids = tokenizer(article, return_tensors="pt").input_ids
|
||||
with self.assertRaises(ValueError):
|
||||
model.generate(input_ids, input_ids=input_ids)
|
||||
|
||||
def test_generate_too_many_encoder_kwargs(self):
|
||||
"""Test that passing redundant inputs results in an exception (`input_ids` and `inputs_embeds` in LLMs)"""
|
||||
article = "I need input_ids to generate"
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-bart", max_length=10)
|
||||
input_ids = tokenizer(article, return_tensors="pt").input_ids
|
||||
with self.assertRaises(ValueError):
|
||||
model.generate(input_ids=input_ids, inputs_embeds=input_ids)
|
||||
|
||||
def test_generate_input_features_as_encoder_kwarg(self):
|
||||
"""Test that non-`input_ids` main model inputs are correctly handled as positional arguments"""
|
||||
input_features = floats_tensor((3, 80, 60))
|
||||
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
||||
"hf-internal-testing/tiny-random-WhisperForConditionalGeneration"
|
||||
)
|
||||
input_features.to(torch_device)
|
||||
model = model.to(torch_device)
|
||||
|
||||
output_sequences_kwargs = model.generate(input_features=input_features, max_length=5)
|
||||
output_sequences = model.generate(input_features, max_length=5)
|
||||
output_sequences_kwargs = output_sequences_kwargs.cpu().numpy()
|
||||
output_sequences = output_sequences.cpu().numpy()
|
||||
|
||||
self.assertTrue(np.array_equal(output_sequences, output_sequences_kwargs))
|
||||
self.assertEqual(output_sequences.shape, (3, 5))
|
||||
|
||||
def test_generate_encoder_outputs_attention_mask(self):
|
||||
"""Test that `generate` can handle attention masks when the encoder outputs are passed"""
|
||||
input_features = floats_tensor((3, 80, 60))
|
||||
attention_mask = torch.randint(0, 2, input_features.shape).to(torch_device)
|
||||
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
||||
"hf-internal-testing/tiny-random-WhisperForConditionalGeneration"
|
||||
)
|
||||
input_features = input_features.to(torch_device)
|
||||
attention_mask = attention_mask.to(torch_device)
|
||||
model = model.to(torch_device)
|
||||
|
||||
encoder = model.get_encoder()
|
||||
encoder_outputs = encoder(input_features)
|
||||
|
||||
output_sequences_no_mask = model.generate(encoder_outputs=encoder_outputs)
|
||||
output_sequences_with_mask = model.generate(encoder_outputs=encoder_outputs, attention_mask=attention_mask)
|
||||
output_sequences_no_mask = output_sequences_no_mask.cpu().numpy()
|
||||
output_sequences_with_mask = output_sequences_with_mask.cpu().numpy()
|
||||
|
||||
self.assertFalse(np.array_equal(output_sequences_no_mask, output_sequences_with_mask))
|
||||
|
||||
def test_eos_token_id_int_and_list_greedy_search(self):
|
||||
"""Test that `generate` can handle multiple EOS tokens"""
|
||||
generation_kwargs = {
|
||||
"do_sample": False,
|
||||
"num_beams": 1,
|
||||
}
|
||||
expectation = 13
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
text = """Hello, my dog is cute and"""
|
||||
tokens = tokenizer(text, return_tensors="pt")
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
|
||||
model = model.to(torch_device)
|
||||
tokens = tokens.to(torch_device)
|
||||
|
||||
eos_token_id = 873
|
||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||
|
||||
eos_token_id = [873, 198]
|
||||
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs)
|
||||
self.assertTrue(expectation == len(generated_tokens[0]))
|
||||
|
||||
def test_generate_vision2text_conditioning(self):
|
||||
"""Test that `decoder_input_ids` can be used to condition the generation in vision-to-text models"""
|
||||
pixel_values = floats_tensor((2, 3, 30, 30))
|
||||
conditioning_input = torch.tensor([[10], [10]]) # this should be the 2nd output token, after the BOS token
|
||||
model = AutoModelForVision2Seq.from_pretrained(
|
||||
"hf-internal-testing/tiny-random-VisionEncoderDecoderModel-vit-gpt2"
|
||||
)
|
||||
pixel_values = pixel_values.to(torch_device)
|
||||
model = model.to(torch_device)
|
||||
conditioning_input = conditioning_input.to(torch_device)
|
||||
|
||||
# we can condition on decoder_input_ids (expected decoder input) and input_ids (which we pipe internally as
|
||||
# decoder_input_ids, if the encoder is not a model with text input)
|
||||
output_sequences_decoder_input_ids = model.generate(
|
||||
pixel_values, max_length=5, decoder_input_ids=conditioning_input
|
||||
)
|
||||
output_sequences_input_ids = model.generate(pixel_values, max_length=5, input_ids=conditioning_input)
|
||||
output_sequences_decoder_input_ids = output_sequences_decoder_input_ids.cpu().numpy()
|
||||
output_sequences_input_ids = output_sequences_input_ids.cpu().numpy()
|
||||
conditioning_input = conditioning_input.cpu().numpy()
|
||||
|
||||
self.assertTrue(np.array_equal(output_sequences_decoder_input_ids, output_sequences_input_ids))
|
||||
self.assertTrue(np.array_equal(output_sequences_decoder_input_ids[:, 1:2], conditioning_input))
|
||||
|
||||
|
||||
@require_torch
|
||||
class TokenHealingTestCase(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user