From 4a82f4f85685c22b995108909485d822f3e3c607 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Thu, 11 Apr 2019 13:11:22 +0200 Subject: [PATCH 1/5] update special token addition --- pytorch_pretrained_bert/modeling_openai.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_pretrained_bert/modeling_openai.py b/pytorch_pretrained_bert/modeling_openai.py index fb3d0cadb7..feae95d962 100644 --- a/pytorch_pretrained_bert/modeling_openai.py +++ b/pytorch_pretrained_bert/modeling_openai.py @@ -608,6 +608,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): # Build new embeddings and initialize all new embeddings (in particular the special tokens) old_embed = self.tokens_embed self.tokens_embed = nn.Embedding(self.config.total_tokens_embeddings, self.config.n_embd) + self.tokens_embed.to(old_embed.device.weight.device) self.init_weights(self.tokens_embed) # Copy word embeddings from the previous weights self.tokens_embed.weight.data[:self.config.vocab_size, :] = old_embed.weight.data[:self.config.vocab_size, :] From a05fad8dcee87087368ad996fe2d76599b406e34 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Thu, 11 Apr 2019 13:16:17 +0200 Subject: [PATCH 2/5] fix typo --- pytorch_pretrained_bert/modeling_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_pretrained_bert/modeling_openai.py b/pytorch_pretrained_bert/modeling_openai.py index feae95d962..1a2a3feb20 100644 --- a/pytorch_pretrained_bert/modeling_openai.py +++ b/pytorch_pretrained_bert/modeling_openai.py @@ -608,7 +608,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): # Build new embeddings and initialize all new embeddings (in particular the special tokens) old_embed = self.tokens_embed self.tokens_embed = nn.Embedding(self.config.total_tokens_embeddings, self.config.n_embd) - self.tokens_embed.to(old_embed.device.weight.device) + self.tokens_embed.to(old_embed.weight.device) self.init_weights(self.tokens_embed) # Copy word embeddings from the previous weights self.tokens_embed.weight.data[:self.config.vocab_size, :] = old_embed.weight.data[:self.config.vocab_size, :] From 1d203a34c06fb8b2c1de856d58950f9d193cc1fc Mon Sep 17 00:00:00 2001 From: thomwolf Date: Thu, 11 Apr 2019 23:51:03 +0200 Subject: [PATCH 3/5] back to simple indexing --- pytorch_pretrained_bert/modeling_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_pretrained_bert/modeling_openai.py b/pytorch_pretrained_bert/modeling_openai.py index 1a2a3feb20..be4f959485 100644 --- a/pytorch_pretrained_bert/modeling_openai.py +++ b/pytorch_pretrained_bert/modeling_openai.py @@ -372,7 +372,7 @@ class OpenAIGPTMultipleChoiceHead(nn.Module): # Classification logits # hidden_state (bsz, num_choices, seq_length, hidden_size) # mc_token_ids (bsz, num_choices, 1) - mc_token_ids = mc_token_ids.unsqueeze(-1).expand(-1, -1, -1, hidden_states.size(-1)) + mc_token_ids = mc_token_ids.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, -1, hidden_states.size(-1)) # (bsz, num_choices, 1, hidden_size) multiple_choice_h = hidden_states.gather(2, mc_token_ids).squeeze(2) # (bsz, num_choices, hidden_size) From b509bf765574852648020d60690386b80e970cf6 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Fri, 12 Apr 2019 12:12:33 +0200 Subject: [PATCH 4/5] updating loss computation --- pytorch_pretrained_bert/modeling_openai.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pytorch_pretrained_bert/modeling_openai.py b/pytorch_pretrained_bert/modeling_openai.py index be4f959485..c4d20c331e 100644 --- a/pytorch_pretrained_bert/modeling_openai.py +++ b/pytorch_pretrained_bert/modeling_openai.py @@ -716,9 +716,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): lm_logits = self.lm_head(hidden_states) if lm_labels is not None: # Shift so that tokens < n predict n - shift_logits = lm_logits[:, :-1].contiguous() - shift_labels = lm_labels[:, 1:].contiguous() - + shift_logits = lm_logits[..., :-1, :].contiguous() + shift_labels = lm_labels[..., 1:].contiguous() # Flatten the tokens loss_fct = CrossEntropyLoss(ignore_index=-1) loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), @@ -808,11 +807,10 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): mc_logits = self.multiple_choice_head(hidden_states, mc_token_ids) losses = [] if lm_labels is not None: - shift_logits = lm_logits[:, :-1].contiguous() - shift_labels = lm_labels[:, 1:].contiguous() + shift_logits = lm_logits[..., :-1, :].contiguous() + shift_labels = lm_labels[..., 1:].contiguous() loss_fct = CrossEntropyLoss(ignore_index=-1) - losses.append(loss_fct(shift_logits.view(-1, - shift_logits.size(-1)), shift_labels.view(-1))) + losses.append(loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))) if mc_labels is not None: loss_fct = CrossEntropyLoss() losses.append(loss_fct(mc_logits.view(-1, mc_logits.size(-1)), mc_labels.view(-1))) From fe2756ff41147ea6de14d8f81ecc5304382af91d Mon Sep 17 00:00:00 2001 From: thomwolf Date: Mon, 15 Apr 2019 10:04:05 +0200 Subject: [PATCH 5/5] update double head model --- pytorch_pretrained_bert/modeling_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_pretrained_bert/modeling_openai.py b/pytorch_pretrained_bert/modeling_openai.py index c4d20c331e..7b95d74f7c 100644 --- a/pytorch_pretrained_bert/modeling_openai.py +++ b/pytorch_pretrained_bert/modeling_openai.py @@ -371,7 +371,7 @@ class OpenAIGPTMultipleChoiceHead(nn.Module): def forward(self, hidden_states, mc_token_ids): # Classification logits # hidden_state (bsz, num_choices, seq_length, hidden_size) - # mc_token_ids (bsz, num_choices, 1) + # mc_token_ids (bsz, num_choices) mc_token_ids = mc_token_ids.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, -1, hidden_states.size(-1)) # (bsz, num_choices, 1, hidden_size) multiple_choice_h = hidden_states.gather(2, mc_token_ids).squeeze(2)