From 822f725a07fda22042bb50b71c66be83007a9d1c Mon Sep 17 00:00:00 2001 From: patrickvonplaten Date: Mon, 23 Dec 2019 21:49:32 +0100 Subject: [PATCH 1/4] duplicated line for repeating_words_penalty_for_language_generation --- src/transformers/modeling_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 8722d578fd..5ae89ff691 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -585,8 +585,6 @@ class PreTrainedModel(nn.Module): Beginning of sentence token if no prompt is provided. Default to 0. **eos_token_ids**: (`optional`) int or list of int End of sequence token or list of tokens to stop the generation. Default to 0. - **length_penalty**: (`optional`) int - Exponential penalty to the length. Default to 0. **length_penalty**: (`optional`) float Exponential penalty to the length. Default to 1. **num_return_sequences**: (`optional`) int From 88def24c4583d19c7338aa0c8215826093aabf75 Mon Sep 17 00:00:00 2001 From: patrickvonplaten Date: Thu, 26 Dec 2019 00:27:16 +0100 Subject: [PATCH 2/4] merge conflicts - renamed to previous_token singular --- src/transformers/modeling_utils.py | 105 +++++++++++++++++++---------- 1 file changed, 70 insertions(+), 35 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 5ae89ff691..d4421a05a6 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -556,46 +556,81 @@ class PreTrainedModel(nn.Module): length_penalty=None, num_return_sequences=None, ): - """ Sequence generator for models with a LM head. - - The method currently supports greedy or penalized greedy decoding, sampling with top-k or nucleus sampling + r""" Generates sequences for models with a LM head. The method currently supports greedy or penalized greedy decoding, sampling with top-k or nucleus sampling and beam-search. - Adapted in part from Facebook's XLM beam search code: https://github.com/facebookresearch/XLM + Adapted in part from `Facebook's XLM beam search code`_. - Params: - **input_ids**: (`optional`) `torch.LongTensor` of shape (1, sequence_length) + .. _`Facebook's XLM beam search code`: + https://github.com/facebookresearch/XLM/blob/9e6f6814d17be4fe5b15f2e6c43eb2b2d76daeb4/src/model/transformer.py#L529 + + + Parameters: + + input_ids: (`optional`) `torch.LongTensor` of shape `(batch_size, sequence_length)` The sequence used as a prompt for the generation. If `None` the method initializes - it as an empty `torch.LongTensor` of shape (1,) - **max_length**: (`optional`) int + it as an empty `torch.LongTensor` of shape `(1,)`. + + max_length: (`optional`) int The max length of the sequence to be generated. Between 1 and infinity. Default to 20. - **do_sample**: (`optional`) bool - If set to `False` we use greedy decoding; otherwise sampling. Default to greedy sampling. - **num_beams**: (`optional`) int - Number of beams for beam search. 1 means no beam serach. Default to 1. - **temperature**: (`optional`) float - The value used to module the next token probabilities. - **top_k**: (`optional`) int + + do_sample: (`optional`) bool + If set to `False` greedy decoding is used. Otherwise sampling is used. Default to greedy sampling. + + num_beams: (`optional`) int + Number of beams for beam search. Must be between 1 and infinity. 1 means no beam search. Default to 1. + + temperature: (`optional`) float + The value used to module the next token probabilities. Must be strictely positive. Default to 1.0. + + top_k: (`optional`) int The number of highest probability vocabulary tokens to keep for top-k-filtering. Between 1 and infinity. Default to 50. - **top_p**: (`optional`) float + + top_p: (`optional`) float The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Must be between 0 and 1. Default to 1. - **repetition_penalty**: (`optional`) float - The parameter for repetition penalty. Between 1.0 and + infinity. 1.0 means no penalty. Default to 1. - **bos_token_id**: (`optional`) int + + repetition_penalty: (`optional`) float + The parameter for repetition penalty. Between 1.0 and infinity. 1.0 means no penalty. Default to 1.0. + + bos_token_id: (`optional`) int Beginning of sentence token if no prompt is provided. Default to 0. - **eos_token_ids**: (`optional`) int or list of int + + eos_token_ids: (`optional`) int or list of int End of sequence token or list of tokens to stop the generation. Default to 0. - **length_penalty**: (`optional`) float + length_penalty: (`optional`) float Exponential penalty to the length. Default to 1. - **num_return_sequences**: (`optional`) int - The number of independantly computed returned sequences for each element in the batch. Default to 1. + + num_return_sequences: (`optional`) int + The number of independently computed returned sequences for each element in the batch. Default to 1. + + Examples:: + + tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache. + outputs = model.generate(max_length=40, bos_token_id=tokenizer.bos_token_id, eos_token_ids=tokenizer.eos_token_id) # do greedy decoding without beam search + print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) + + tokenizer = AutoTokenizer.from_pretrained('openai-gpt') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache. + input_context = 'The dog' + input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context + outputs = model.generate(input_ids=input_ids, do_sample=True, num_beams=5, num_return_sequences=3) # generate 3 independent sequences using beam search decoding (5 beams) from initial context 'The dog' + for i in range(3): # 3 output sequences were generated + print('Generated {}: {}'.format(i, tokenizer.decode(outputs[0][i], skip_special_tokens=True))) + + tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache. + input_context = 'The dog' + input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context + outputs = model.generate(input_ids=input_ids, max_length=40, do_sample=True, temperature=0.7, bos_token_id=tokenizer.bos_token_id, eos_token_ids=tokenizer.eos_token_id, num_beams=3) # generate sequences using beam search decoding (3 beams) + print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) """ # We cannot generate if the model does not have a LM head if self.get_output_embeddings() is None: raise AttributeError( "You tried to generate sequences with a model that does not have a LM Head." - "Please use another model class (e.g. `OpenAIGPTLMHeadModel`)" + "Please use another model class (e.g. `OpenAIGPTLMHeadModel`, `XLNetLMHeadModel`, `GPT2LMHeadModel`, `CTRLLMHeadModel`, `T5WithLMHeadModel`, `TransfoXLLMHeadModel`)" ) max_length = max_length if max_length is not None else self.config.max_length @@ -623,7 +658,7 @@ class PreTrainedModel(nn.Module): assert isinstance(max_length, int) and max_length > 0, "`max_length` should be a strictely positive integer." assert isinstance(do_sample, bool), "`do_sample` should be a boolean." assert isinstance(num_beams, int) and num_beams > 0, "`num_beams` should be a strictely positive integer." - # assert temperature >= 0, "`temperature` should be positive." + assert temperature > 0, "`temperature` should be strictely positive." assert isinstance(top_k, int) and top_k >= 0, "`top_k` should be a positive integer." assert 0 <= top_p <= 1, "`top_p` should be between 0 and 1." assert repetition_penalty >= 1.0, "`repetition_penalty` should be >= 1." @@ -725,16 +760,16 @@ class PreTrainedModel(nn.Module): # repetition penalty from CTRL paper (https://arxiv.org/abs/1909.05858) if repetition_penalty != 1.0: for i in range(batch_size): - for previous_tokens in set(input_ids[i].tolist()): + for previous_token in set(input_ids[i].tolist()): # if score < 0 then repetition penalty has to multiplied to reduce the previous token probability - if next_token_logits[i, previous_tokens] < 0: - next_token_logits[i, previous_tokens] *= repetition_penalty + if next_token_logits[i, previous_token] < 0: + next_token_logits[i, previous_token] *= repetition_penalty else: - next_token_logits[i, previous_tokens] /= repetition_penalty + next_token_logits[i, previous_token] /= repetition_penalty if do_sample: # Temperature (higher temperature => more likely to sample low probability tokens) - if temperature > 0 and temperature != 1.0: + if temperature != 1.0: next_token_logits = next_token_logits / temperature # Top-p/top-k filtering next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p) @@ -808,16 +843,16 @@ class PreTrainedModel(nn.Module): # repetition penalty (from CTRL paper https://arxiv.org/abs/1909.05858) if repetition_penalty != 1.0: for i in range(batch_size * num_beams): - for previous_tokens in set(input_ids[i].tolist()): + for previous_token in set(input_ids[i].tolist()): # if score < 0 then repetition penalty has to multiplied to reduce the previous token probability - if scores[i, previous_tokens] < 0: - scores[i, previous_tokens] *= repetition_penalty + if scores[i, previous_token] < 0: + scores[i, previous_token] *= repetition_penalty else: - scores[i, previous_tokens] /= repetition_penalty + scores[i, previous_token] /= repetition_penalty if do_sample: # Temperature (higher temperature => more likely to sample low probability tokens) - if temperature > 0 and temperature != 1.0: + if temperature != 1.0: scores = scores / temperature # Top-p/top-k filtering scores = top_k_top_p_filtering( From 87c8fca9bc39435f518e0b60e44aafc374333886 Mon Sep 17 00:00:00 2001 From: patrickvonplaten Date: Wed, 25 Dec 2019 02:27:25 +0100 Subject: [PATCH 3/4] add example for ctrl text generation in docs --- src/transformers/modeling_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index d4421a05a6..5a36b436be 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -624,6 +624,14 @@ class PreTrainedModel(nn.Module): input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context outputs = model.generate(input_ids=input_ids, max_length=40, do_sample=True, temperature=0.7, bos_token_id=tokenizer.bos_token_id, eos_token_ids=tokenizer.eos_token_id, num_beams=3) # generate sequences using beam search decoding (3 beams) print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) + + tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer + model = AutoModelWithLMHead.from_pretrained('ctrl') # Download model and configuration from S3 and cache. + input_context = 'Legal My neighbor is' # "Legal" is one of the control codes for ctrl + input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context + outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2) # generate sequences using using greedy search + print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) + """ # We cannot generate if the model does not have a LM head From 0f6017bee3668222000ea788f552fac700362963 Mon Sep 17 00:00:00 2001 From: patrickvonplaten Date: Thu, 26 Dec 2019 00:35:11 +0100 Subject: [PATCH 4/4] improve comments for examples --- src/transformers/modeling_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 5a36b436be..f2d3ca39f1 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -614,7 +614,7 @@ class PreTrainedModel(nn.Module): model = AutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from S3 and cache. input_context = 'The dog' input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context - outputs = model.generate(input_ids=input_ids, do_sample=True, num_beams=5, num_return_sequences=3) # generate 3 independent sequences using beam search decoding (5 beams) from initial context 'The dog' + outputs = model.generate(input_ids=input_ids, do_sample=True, num_beams=5, num_return_sequences=3, temperature=1.5) # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog' for i in range(3): # 3 output sequences were generated print('Generated {}: {}'.format(i, tokenizer.decode(outputs[0][i], skip_special_tokens=True))) @@ -622,7 +622,7 @@ class PreTrainedModel(nn.Module): model = AutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from S3 and cache. input_context = 'The dog' input_ids = torch.tensor(tokenizer.encode(input_context)).unsqueeze(0) # encode input context - outputs = model.generate(input_ids=input_ids, max_length=40, do_sample=True, temperature=0.7, bos_token_id=tokenizer.bos_token_id, eos_token_ids=tokenizer.eos_token_id, num_beams=3) # generate sequences using beam search decoding (3 beams) + outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, bos_token_id=tokenizer.bos_token_id, eos_token_ids=tokenizer.eos_token_id, num_beams=3) # generate sequences using greedy beam search decoding (3 beams) print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer