TF XLA greedy generation (#15786)
* First attempt at TF XLA generation * Fix comments * Update XLA greedy generate with direct XLA calls * Support attention mask, prepare_inputs_for_generation no longer hardcoded for greedy * Handle position_ids correctly * make xla generate work for non xla case * force using xla generate * refactor * more fixes * finish cleaning * finish * finish * clean gpt2 tests * add gpt2 tests * correct more cases * up * finish * finish * more fixes * flake 8 stuff * final rag fix * Update src/transformers/models/rag/modeling_tf_rag.py * finish t5 as well * finish * Update src/transformers/generation_utils.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
@@ -227,6 +227,23 @@ class TFT5ModelTester:
|
||||
# test that outputs are equal for slice
|
||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
||||
|
||||
def create_and_check_t5_xla_generate(self, config, input_ids, *args):
|
||||
config.eos_token_id = None
|
||||
config.max_length = 10
|
||||
config.do_sample = False
|
||||
config.num_beams = 1
|
||||
model = TFT5ForConditionalGeneration(config=config)
|
||||
|
||||
# make sure there are no pad tokens in prompt
|
||||
input_ids = tf.where(input_ids != config.pad_token_id, input_ids, config.pad_token_id + 5)
|
||||
|
||||
generated = model.generate(input_ids)
|
||||
|
||||
generate_xla = tf.function(model.generate, jit_compile=True)
|
||||
generated_xla = generate_xla(input_ids)
|
||||
|
||||
self.parent.assertListEqual(generated.numpy().tolist(), generated_xla.numpy().tolist())
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(config, input_ids, input_mask, token_labels) = config_and_inputs
|
||||
@@ -280,6 +297,10 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_t5_decoder_model_past_large_inputs(*config_and_inputs)
|
||||
|
||||
def test_t5_model_xla_generate(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_t5_xla_generate(*config_and_inputs)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
@@ -454,6 +475,27 @@ class TFT5EncoderOnlyModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
@require_sentencepiece
|
||||
@require_tokenizers
|
||||
class TFT5GenerationIntegrationTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_greedy_xla_generate_simple(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
|
||||
sentence = "Translate English to German: Today is a beautiful day."
|
||||
input_ids = tokenizer(sentence, return_tensors="tf", padding=True).input_ids
|
||||
|
||||
xla_generate = tf.function(model.generate, jit_compile=True)
|
||||
|
||||
output_ids = model.generate(input_ids)
|
||||
output_ids_xla = xla_generate(input_ids)
|
||||
|
||||
output_strings = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
||||
output_strings_xla = tokenizer.batch_decode(output_ids_xla, skip_special_tokens=True)
|
||||
|
||||
expected_output_string = ["Heute ist ein schöner Tag."]
|
||||
|
||||
self.assertListEqual(expected_output_string, output_strings)
|
||||
self.assertListEqual(expected_output_string, output_strings_xla)
|
||||
|
||||
@slow
|
||||
def test_greedy_generate(self):
|
||||
model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
|
||||
Reference in New Issue
Block a user