[TF] Fix Tensorflow XLA Generation on limited seq_len models (#33903)

* fix tf xla generation on limited seq_len models

* [run-slow] opt

* [run-slow] opt
This commit is contained in:
Anton Vlasjuk
2024-10-05 16:20:50 +02:00
committed by GitHub
parent 22e102ad98
commit 5ef432e474

View File

@@ -1715,10 +1715,9 @@ class TFModelTesterMixin:
model.train_on_batch(test_batch, test_batch_labels)
def _test_xla_generate(self, **generate_kwargs):
def _generate_and_check_results(model, inputs_dict):
if "input_ids" in inputs_dict:
inputs = inputs_dict["input_ids"]
def _generate_and_check_results(model, inputs, is_input_ids):
# make sure there are no pad tokens in prompt, which may trigger unwanted behavior
if is_input_ids:
if model.generation_config.pad_token_id is not None:
if config.pad_token_id == 0:
new_pad_token = model.generation_config.pad_token_id + 1
@@ -1727,10 +1726,6 @@ class TFModelTesterMixin:
else:
new_pad_token = None
inputs = tf.where(inputs != model.generation_config.pad_token_id, inputs, new_pad_token)
elif "input_features" in inputs_dict:
inputs = inputs_dict["input_features"]
else:
raise ValueError("No valid generate input found in inputs_dict")
generated = model.generate(inputs, **generate_kwargs).numpy()
generate_xla = tf.function(model.generate, jit_compile=True)
@@ -1753,12 +1748,20 @@ class TFModelTesterMixin:
config.eos_token_id = None # Generate until max length
config.do_sample = False
# extract the input to the model
is_input_ids = "input_ids" in inputs_dict
is_input_features = "input_features" in inputs_dict
if not (is_input_ids or is_input_features):
raise ValueError("No valid generate input found in inputs_dict")
inputs = inputs_dict["input_ids"] if is_input_ids else inputs_dict["input_features"]
# fix config for models with additional sequence-length limiting settings
seq_len = inputs.get_shape()[1]
for var_name in ["max_position_embeddings", "max_target_positions"]:
attr = getattr(config, var_name, None)
if attr is not None and attr < generate_kwargs["max_new_tokens"]:
if attr is not None and attr < seq_len + generate_kwargs["max_new_tokens"]:
try:
setattr(config, var_name, generate_kwargs["max_new_tokens"])
setattr(config, var_name, seq_len + generate_kwargs["max_new_tokens"])
except NotImplementedError:
# xlnet will raise an exception when trying to set
# max_position_embeddings.
@@ -1767,10 +1770,10 @@ class TFModelTesterMixin:
model = model_class(config)
if model.supports_xla_generation:
_generate_and_check_results(model, inputs_dict)
_generate_and_check_results(model, inputs, is_input_ids)
else:
with self.assertRaises(ValueError):
_generate_and_check_results(model, inputs_dict)
_generate_and_check_results(model, inputs, is_input_ids)
def test_xla_generate_fast(self):
"""