From f1e4db2aa80d72bfe9992476ccca52348a789db0 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Tue, 5 Nov 2019 09:38:00 +0100
Subject: [PATCH] Fix #1686

---
 transformers/modeling_openai.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/transformers/modeling_openai.py b/transformers/modeling_openai.py
index 36c1560f1a..12c0f82e60 100644
--- a/transformers/modeling_openai.py
+++ b/transformers/modeling_openai.py
@@ -565,9 +565,12 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
         tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
         model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
         tokenizer.add_special_tokens({'cls_token': '[CLS]'})  # Add a [CLS] to the vocabulary (we should train it also!)
+        model.resize_token_embeddings(len(tokenizer))
+
         choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
         input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0)  # Batch size 1, 2 choices
-        mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0)  # Batch size 1
+        mc_token_ids = torch.tensor([input_ids.size(-1)-1, input_ids.size(-1)-1]).unsqueeze(0)  # Batch size 1
+
         outputs = model(input_ids, mc_token_ids=mc_token_ids)
         lm_prediction_scores, mc_prediction_scores = outputs[:2]