Improve special_token_id logic in run_generation.py and add tests (#2885)
* improving generation * finalized special token behaviour for no_beam_search generation * solved modeling_utils merge conflict * solve merge conflicts in modeling_utils.py * add run_generation improvements from PR #2749 * adapted language generation to not use hardcoded -1 if no padding token is available * remove the -1 removal as hard coded -1`s are not necessary anymore * add lightweight language generation testing for randomely initialized models - just checking whether no errors are thrown * add slow language generation tests for pretrained models using hardcoded output with pytorch seed * delete ipdb * check that all generated tokens are valid * renaming * renaming Generation -> Generate * make style * updated so that generate_beam_search has same token behavior than generate_no_beam_search * consistent return format for run_generation.py * deleted pretrain lm generate tests -> will be added in another PR * cleaning of unused if statements and renaming * run_generate will always return an iterable * make style * consistent renaming * improve naming, make sure generate function always returns the same tensor, add docstring * add slow tests for all lmhead models * make style and improve example comments modeling_utils * better naming and refactoring in modeling_utils * improving generation * finalized special token behaviour for no_beam_search generation * solved modeling_utils merge conflict * solve merge conflicts in modeling_utils.py * add run_generation improvements from PR #2749 * adapted language generation to not use hardcoded -1 if no padding token is available * remove the -1 removal as hard coded -1`s are not necessary anymore * add lightweight language generation testing for randomely initialized models - just checking whether no errors are thrown * add slow language generation tests for pretrained models using hardcoded output with pytorch seed * delete ipdb * check that all generated tokens are valid * renaming * renaming Generation -> Generate * make style * updated so that generate_beam_search has same token behavior than generate_no_beam_search * consistent return format for run_generation.py * deleted pretrain lm generate tests -> will be added in another PR * cleaning of unused if statements and renaming * run_generate will always return an iterable * make style * consistent renaming * improve naming, make sure generate function always returns the same tensor, add docstring * add slow tests for all lmhead models * make style and improve example comments modeling_utils * better naming and refactoring in modeling_utils * changed fast random lm generation testing design to more general one * delete in old testing design in gpt2 * correct old variable name * temporary fix for encoder_decoder lm generation tests - has to be updated when t5 is fixed * adapted all fast random generate tests to new design * better warning description in modeling_utils * better comment * better comment and error message Co-authored-by: Thomas Wolf <thomwolf@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
c749a543fa
commit
fc38d4c86f
@@ -13,7 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import copy
|
||||
import logging
|
||||
import os.path
|
||||
@@ -53,6 +52,7 @@ class ModelTesterMixin:
|
||||
|
||||
model_tester = None
|
||||
all_model_classes = ()
|
||||
all_generative_model_classes = ()
|
||||
test_torchscript = True
|
||||
test_pruning = True
|
||||
test_resize_embeddings = True
|
||||
@@ -595,6 +595,47 @@ class ModelTesterMixin:
|
||||
with torch.no_grad():
|
||||
model(**inputs_dict)
|
||||
|
||||
def test_lm_head_model_random_generate(self):
|
||||
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
input_ids = inputs_dict.get(
|
||||
"input_ids", None
|
||||
) # TODO (PVP): ugly workaround to make code work for t5 for the moment - has to changed when t5 is fixed.
|
||||
|
||||
for model_class in self.all_generative_model_classes:
|
||||
model = model_class(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
|
||||
if config.bos_token_id is None:
|
||||
with self.assertRaises(AssertionError):
|
||||
model.generate(max_length=5)
|
||||
# batch_size = 1
|
||||
self._check_generated_tokens(model.generate(input_ids))
|
||||
# batch_size = 1, num_beams > 1
|
||||
self._check_generated_tokens(model.generate(input_ids, num_beams=3))
|
||||
else:
|
||||
# batch_size = 1
|
||||
self._check_generated_tokens(model.generate(max_length=5))
|
||||
# batch_size = 1, num_beams > 1
|
||||
self._check_generated_tokens(model.generate(max_length=5, num_beams=3))
|
||||
|
||||
# batch_size > 1, sample
|
||||
self._check_generated_tokens(model.generate(input_ids, num_return_sequences=3))
|
||||
# batch_size > 1, greedy
|
||||
self._check_generated_tokens(model.generate(input_ids, do_sample=False, num_return_sequences=3))
|
||||
# batch_size > 1, num_beams > 1, sample
|
||||
self._check_generated_tokens(model.generate(input_ids, num_beams=3, num_return_sequences=3,))
|
||||
# batch_size > 1, num_beams > 1, greedy
|
||||
self._check_generated_tokens(
|
||||
model.generate(input_ids, do_sample=False, num_beams=3, num_return_sequences=3)
|
||||
)
|
||||
|
||||
def _check_generated_tokens(self, output_ids):
|
||||
for token_id in output_ids[0].tolist():
|
||||
self.assertGreaterEqual(token_id, 0)
|
||||
self.assertLess(token_id, self.model_tester.vocab_size)
|
||||
|
||||
|
||||
global_rng = random.Random()
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ if is_torch_available():
|
||||
class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
|
||||
all_model_classes = (CTRLModel, CTRLLMHeadModel) if is_torch_available() else ()
|
||||
all_generative_model_classes = (CTRLLMHeadModel,) if is_torch_available() else ()
|
||||
test_pruning = False
|
||||
test_torchscript = False
|
||||
test_resize_embeddings = False
|
||||
|
||||
@@ -37,6 +37,9 @@ if is_torch_available():
|
||||
class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
|
||||
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
|
||||
all_generative_model_classes = (
|
||||
(GPT2LMHeadModel,) if is_torch_available() else ()
|
||||
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
|
||||
|
||||
class GPT2ModelTester(object):
|
||||
def __init__(
|
||||
@@ -88,6 +91,8 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.scope = scope
|
||||
self.bos_token_id = vocab_size - 1
|
||||
self.eos_token_id = vocab_size - 1
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
@@ -122,9 +127,11 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
# hidden_dropout_prob=self.hidden_dropout_prob,
|
||||
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
n_positions=self.max_position_embeddings,
|
||||
n_ctx=self.max_position_embeddings
|
||||
n_ctx=self.max_position_embeddings,
|
||||
# type_vocab_size=self.type_vocab_size,
|
||||
# initializer_range=self.initializer_range
|
||||
bos_token_id=self.bos_token_id,
|
||||
eos_token_ids=self.eos_token_id,
|
||||
)
|
||||
|
||||
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
|
||||
|
||||
@@ -39,6 +39,9 @@ class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (
|
||||
(OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else ()
|
||||
)
|
||||
all_generative_model_classes = (
|
||||
(OpenAIGPTLMHeadModel,) if is_torch_available() else ()
|
||||
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
|
||||
|
||||
class OpenAIGPTModelTester(object):
|
||||
def __init__(
|
||||
|
||||
@@ -34,6 +34,7 @@ if is_torch_available():
|
||||
class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
|
||||
all_model_classes = (TransfoXLModel, TransfoXLLMHeadModel) if is_torch_available() else ()
|
||||
all_generative_model_classes = (TransfoXLLMHeadModel,) if is_torch_available() else ()
|
||||
test_pruning = False
|
||||
test_torchscript = False
|
||||
test_resize_embeddings = False
|
||||
@@ -59,6 +60,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
num_hidden_layers=5,
|
||||
scope=None,
|
||||
seed=1,
|
||||
eos_token_id=0,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
@@ -79,6 +81,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.scope = scope
|
||||
self.seed = seed
|
||||
self.eos_token_id = eos_token_id
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
@@ -100,6 +103,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
d_inner=self.d_inner,
|
||||
div_val=self.div_val,
|
||||
n_layer=self.num_hidden_layers,
|
||||
eos_token_ids=self.eos_token_id,
|
||||
)
|
||||
|
||||
return (config, input_ids_1, input_ids_2, lm_labels)
|
||||
|
||||
@@ -49,6 +49,9 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
if is_torch_available()
|
||||
else ()
|
||||
)
|
||||
all_generative_model_classes = (
|
||||
(XLMWithLMHeadModel,) if is_torch_available() else ()
|
||||
) # TODO (PVP): Check other models whether language generation is also applicable
|
||||
|
||||
class XLMModelTester(object):
|
||||
def __init__(
|
||||
@@ -81,6 +84,7 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
summary_type="last",
|
||||
use_proj=True,
|
||||
scope=None,
|
||||
bos_token_id=0,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
@@ -111,6 +115,7 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
self.num_labels = num_labels
|
||||
self.num_choices = num_choices
|
||||
self.scope = scope
|
||||
self.bos_token_id = bos_token_id
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
@@ -151,6 +156,7 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
initializer_range=self.initializer_range,
|
||||
summary_type=self.summary_type,
|
||||
use_proj=self.use_proj,
|
||||
bos_token_id=self.bos_token_id,
|
||||
)
|
||||
|
||||
return (
|
||||
|
||||
@@ -52,6 +52,9 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
if is_torch_available()
|
||||
else ()
|
||||
)
|
||||
all_generative_model_classes = (
|
||||
(XLNetLMHeadModel,) if is_torch_available() else ()
|
||||
) # TODO (PVP): Check other models whether language generation is also applicable
|
||||
test_pruning = False
|
||||
|
||||
class XLNetModelTester(object):
|
||||
@@ -78,6 +81,9 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
initializer_range=0.05,
|
||||
seed=1,
|
||||
type_vocab_size=2,
|
||||
bos_token_id=1,
|
||||
eos_token_id=2,
|
||||
pad_token_id=5,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
@@ -101,6 +107,9 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
self.seed = seed
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.bos_token_id = bos_token_id
|
||||
self.pad_token_id = pad_token_id
|
||||
self.eos_token_id = eos_token_id
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
@@ -142,6 +151,9 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
bi_data=self.bi_data,
|
||||
initializer_range=self.initializer_range,
|
||||
num_labels=self.type_sequence_label_size,
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
eos_token_id=self.eos_token_id,
|
||||
)
|
||||
|
||||
return (
|
||||
|
||||
Reference in New Issue
Block a user