[TF] Fix Tensorflow XLA Generation on limited seq_len models (#33903)

* fix tf xla generation on limited seq_len models * [run-slow] opt * [run-slow] opt
2024-10-05 16:20:50 +02:00
parent 22e102ad98
commit 5ef432e474
1 changed files with 15 additions and 12 deletions
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -1715,10 +1715,9 @@ class TFModelTesterMixin:
                model.train_on_batch(test_batch, test_batch_labels)
    def _test_xla_generate(self, **generate_kwargs):
-        def _generate_and_check_results(model, inputs_dict):
+        def _generate_and_check_results(model, inputs, is_input_ids):
            if "input_ids" in inputs_dict:
                inputs = inputs_dict["input_ids"]
            # make sure there are no pad tokens in prompt, which may trigger unwanted behavior
            if is_input_ids:
                if model.generation_config.pad_token_id is not None:
                    if config.pad_token_id == 0:
                        new_pad_token = model.generation_config.pad_token_id + 1
@@ -1727,10 +1726,6 @@ class TFModelTesterMixin:
                else:
                    new_pad_token = None
                inputs = tf.where(inputs != model.generation_config.pad_token_id, inputs, new_pad_token)
            elif "input_features" in inputs_dict:
                inputs = inputs_dict["input_features"]
            else:
                raise ValueError("No valid generate input found in inputs_dict")
            generated = model.generate(inputs, **generate_kwargs).numpy()
            generate_xla = tf.function(model.generate, jit_compile=True)
@@ -1753,12 +1748,20 @@ class TFModelTesterMixin:
            config.eos_token_id = None  # Generate until max length
            config.do_sample = False
            # extract the input to the model
            is_input_ids = "input_ids" in inputs_dict
            is_input_features = "input_features" in inputs_dict
            if not (is_input_ids or is_input_features):
                raise ValueError("No valid generate input found in inputs_dict")
            inputs = inputs_dict["input_ids"] if is_input_ids else inputs_dict["input_features"]
            # fix config for models with additional sequence-length limiting settings
            seq_len = inputs.get_shape()[1]
            for var_name in ["max_position_embeddings", "max_target_positions"]:
                attr = getattr(config, var_name, None)
-                if attr is not None and attr < generate_kwargs["max_new_tokens"]:
+                if attr is not None and attr < seq_len + generate_kwargs["max_new_tokens"]:
                    try:
-                        setattr(config, var_name, generate_kwargs["max_new_tokens"])
+                        setattr(config, var_name, seq_len + generate_kwargs["max_new_tokens"])
                    except NotImplementedError:
                        # xlnet will raise an exception when trying to set
                        # max_position_embeddings.
@@ -1767,10 +1770,10 @@ class TFModelTesterMixin:
            model = model_class(config)
            if model.supports_xla_generation:
-                _generate_and_check_results(model, inputs_dict)
+                _generate_and_check_results(model, inputs, is_input_ids)
            else:
                with self.assertRaises(ValueError):
-                    _generate_and_check_results(model, inputs_dict)
+                    _generate_and_check_results(model, inputs, is_input_ids)
    def test_xla_generate_fast(self):
        """