From 2544c1434f8d831daff3fe6a925dced67bc70c64 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 7 Jul 2022 15:08:34 +0200 Subject: [PATCH] [Generate Tests] Make sure no tokens are force-generated (#18053) --- tests/models/bart/test_modeling_bart.py | 8 ++++++++ tests/models/blenderbot/test_modeling_blenderbot.py | 8 ++++++++ .../blenderbot_small/test_modeling_blenderbot_small.py | 8 ++++++++ tests/models/marian/test_modeling_marian.py | 8 ++++++++ tests/models/mbart/test_modeling_mbart.py | 8 ++++++++ tests/models/pegasus/test_modeling_pegasus.py | 8 ++++++++ 6 files changed, 48 insertions(+) diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py index b36bda3b71..5ef86523eb 100644 --- a/tests/models/bart/test_modeling_bart.py +++ b/tests/models/bart/test_modeling_bart.py @@ -116,6 +116,12 @@ class BartModelTester: self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id + # forcing a certain token to be generated, sets all other tokens to -inf + # if however the token to be generated is already at -inf then it can lead token + # `nan` values and thus break generation + self.forced_bos_token_id = None + self.forced_eos_token_id = None + def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -145,6 +151,8 @@ class BartModelTester: eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, + forced_bos_token_id=self.forced_bos_token_id, + forced_eos_token_id=self.forced_eos_token_id, ) def get_pipeline_config(self): diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py index ee76626ffe..9b10e7690c 100644 --- a/tests/models/blenderbot/test_modeling_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_blenderbot.py @@ -107,6 +107,12 @@ class BlenderbotModelTester: self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id + # forcing a certain token to be generated, sets all other tokens to -inf + # if however the token to be generated is already at -inf then it can lead token + # `nan` values and thus break generation + self.forced_bos_token_id = None + self.forced_eos_token_id = None + def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -135,6 +141,8 @@ class BlenderbotModelTester: eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, + forced_bos_token_id=self.forced_bos_token_id, + forced_eos_token_id=self.forced_eos_token_id, ) def get_pipeline_config(self): diff --git a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py index 47503b9c7f..f049fe3769 100644 --- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py @@ -107,6 +107,12 @@ class BlenderbotSmallModelTester: self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id + # forcing a certain token to be generated, sets all other tokens to -inf + # if however the token to be generated is already at -inf then it can lead token + # `nan` values and thus break generation + self.forced_bos_token_id = None + self.forced_eos_token_id = None + def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -135,6 +141,8 @@ class BlenderbotSmallModelTester: eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, + forced_bos_token_id=self.forced_bos_token_id, + forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py index 1039c4a51d..e454f981b4 100644 --- a/tests/models/marian/test_modeling_marian.py +++ b/tests/models/marian/test_modeling_marian.py @@ -123,6 +123,12 @@ class MarianModelTester: self.bos_token_id = bos_token_id self.decoder_start_token_id = decoder_start_token_id + # forcing a certain token to be generated, sets all other tokens to -inf + # if however the token to be generated is already at -inf then it can lead token + # `nan` values and thus break generation + self.forced_bos_token_id = None + self.forced_eos_token_id = None + def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, @@ -152,6 +158,8 @@ class MarianModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, decoder_start_token_id=self.decoder_start_token_id, + forced_bos_token_id=self.forced_bos_token_id, + forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py index 6a8eeed9fb..11f8bd7a0d 100644 --- a/tests/models/mbart/test_modeling_mbart.py +++ b/tests/models/mbart/test_modeling_mbart.py @@ -113,6 +113,12 @@ class MBartModelTester: self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id + # forcing a certain token to be generated, sets all other tokens to -inf + # if however the token to be generated is already at -inf then it can lead token + # `nan` values and thus break generation + self.forced_bos_token_id = None + self.forced_eos_token_id = None + def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -142,6 +148,8 @@ class MBartModelTester: eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, + forced_bos_token_id=self.forced_bos_token_id, + forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self): diff --git a/tests/models/pegasus/test_modeling_pegasus.py b/tests/models/pegasus/test_modeling_pegasus.py index d5e9d22df1..81ed90b8a9 100644 --- a/tests/models/pegasus/test_modeling_pegasus.py +++ b/tests/models/pegasus/test_modeling_pegasus.py @@ -104,6 +104,12 @@ class PegasusModelTester: self.pad_token_id = pad_token_id self.bos_token_id = bos_token_id + # forcing a certain token to be generated, sets all other tokens to -inf + # if however the token to be generated is already at -inf then it can lead token + # `nan` values and thus break generation + self.forced_bos_token_id = None + self.forced_eos_token_id = None + def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( @@ -151,6 +157,8 @@ class PegasusModelTester: eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, + forced_bos_token_id=self.forced_bos_token_id, + forced_eos_token_id=self.forced_eos_token_id, ) def prepare_config_and_inputs_for_common(self):