Generate: Add assisted generation (#22211)

* working mvp * remove breakpoint * fix commit * standardize outputs * tmp commit * tests almost ready * tmp commit * skip a few models * Add streaming; Docs and examples * document limitations * PR commits * Amy PR comments
2023-04-18 17:36:56 +01:00
parent 90247d3e01
commit 78cda46f17
6 changed files with 623 additions and 26 deletions
--- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
+++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
@@ -280,7 +280,7 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT

    # overwrite from GenerationTesterMixin to solve problem
    # with conflicting random seeds
-    def _get_input_ids_and_config(self):
+    def _get_input_ids_and_config(self, batch_size=2):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.attention_type = "original_full"

@@ -288,10 +288,9 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
        attention_mask = torch.ones_like(input_ids, dtype=torch.long)

        # cut to half length & take max batch_size 3
-        max_batch_size = 2
        sequence_length = input_ids.shape[-1] // 2
-        input_ids = input_ids[:max_batch_size, :sequence_length]
-        attention_mask = attention_mask[:max_batch_size, :sequence_length]
+        input_ids = input_ids[:batch_size, :sequence_length]
+        attention_mask = attention_mask[:batch_size, :sequence_length]

        # generate max 3 tokens
        max_length = input_ids.shape[-1] + 3
--- a/tests/models/whisper/test_modeling_tf_whisper.py
+++ b/tests/models/whisper/test_modeling_tf_whisper.py
@@ -303,7 +303,7 @@ class TFWhisperModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC
        input_ids = input_ids[:max_batch_size, :, :]

        # generate max 3 tokens
-        max_length = input_ids.shape[-1] + 3
+        max_length = 4
        if config.eos_token_id is not None and config.pad_token_id is None:
            # hack to allow generate for models such as GPT2 as is done in `generate()`
            config.pad_token_id = config.eos_token_id
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -359,16 +359,15 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
        self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)

-    def _get_input_ids_and_config(self):
+    def _get_input_ids_and_config(self, batch_size=3):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        input_ids = inputs_dict[self.input_name]

-        # cut to half length & take max batch_size 3
-        max_batch_size = 3
-        input_ids = input_ids[:max_batch_size, :, :]
+        # cut to half length & take max batch_size=batch_size
+        input_ids = input_ids[:batch_size, :, :]

        # generate max 3 tokens
-        max_length = input_ids.shape[-1] + 3
+        max_length = 4
        if config.eos_token_id is not None and config.pad_token_id is None:
            # hack to allow generate for models such as GPT2 as is done in `generate()`
            config.pad_token_id = config.eos_token_id