Generate: Add assisted generation (#22211)
* working mvp * remove breakpoint * fix commit * standardize outputs * tmp commit * tests almost ready * tmp commit * skip a few models * Add streaming; Docs and examples * document limitations * PR commits * Amy PR comments
This commit is contained in:
@@ -280,7 +280,7 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
|
||||
|
||||
# overwrite from GenerationTesterMixin to solve problem
|
||||
# with conflicting random seeds
|
||||
def _get_input_ids_and_config(self):
|
||||
def _get_input_ids_and_config(self, batch_size=2):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.attention_type = "original_full"
|
||||
|
||||
@@ -288,10 +288,9 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
|
||||
attention_mask = torch.ones_like(input_ids, dtype=torch.long)
|
||||
|
||||
# cut to half length & take max batch_size 3
|
||||
max_batch_size = 2
|
||||
sequence_length = input_ids.shape[-1] // 2
|
||||
input_ids = input_ids[:max_batch_size, :sequence_length]
|
||||
attention_mask = attention_mask[:max_batch_size, :sequence_length]
|
||||
input_ids = input_ids[:batch_size, :sequence_length]
|
||||
attention_mask = attention_mask[:batch_size, :sequence_length]
|
||||
|
||||
# generate max 3 tokens
|
||||
max_length = input_ids.shape[-1] + 3
|
||||
|
||||
@@ -303,7 +303,7 @@ class TFWhisperModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC
|
||||
input_ids = input_ids[:max_batch_size, :, :]
|
||||
|
||||
# generate max 3 tokens
|
||||
max_length = input_ids.shape[-1] + 3
|
||||
max_length = 4
|
||||
if config.eos_token_id is not None and config.pad_token_id is None:
|
||||
# hack to allow generate for models such as GPT2 as is done in `generate()`
|
||||
config.pad_token_id = config.eos_token_id
|
||||
|
||||
@@ -359,16 +359,15 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
|
||||
|
||||
def _get_input_ids_and_config(self):
|
||||
def _get_input_ids_and_config(self, batch_size=3):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
input_ids = inputs_dict[self.input_name]
|
||||
|
||||
# cut to half length & take max batch_size 3
|
||||
max_batch_size = 3
|
||||
input_ids = input_ids[:max_batch_size, :, :]
|
||||
# cut to half length & take max batch_size=batch_size
|
||||
input_ids = input_ids[:batch_size, :, :]
|
||||
|
||||
# generate max 3 tokens
|
||||
max_length = input_ids.shape[-1] + 3
|
||||
max_length = 4
|
||||
if config.eos_token_id is not None and config.pad_token_id is None:
|
||||
# hack to allow generate for models such as GPT2 as is done in `generate()`
|
||||
config.pad_token_id = config.eos_token_id
|
||||
|
||||
Reference in New Issue
Block a user