From c683c3d5a528c3cb66c6f0e497ccde18875048e0 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Sat, 10 Aug 2019 20:04:35 +0200 Subject: [PATCH] fix #993 --- pytorch_transformers/modeling_gpt2.py | 5 +++-- pytorch_transformers/modeling_openai.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py index 0ecc20516c..148b4a82ae 100644 --- a/pytorch_transformers/modeling_gpt2.py +++ b/pytorch_transformers/modeling_gpt2.py @@ -683,9 +683,10 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2DoubleHeadsModel.from_pretrained('gpt2') - choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary + tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!) + choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices - mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 + mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0) # Batch size 1 outputs = model(input_ids, mc_token_ids) lm_prediction_scores, mc_prediction_scores = outputs[:2] diff --git a/pytorch_transformers/modeling_openai.py b/pytorch_transformers/modeling_openai.py index 536b0e2432..364923b0af 100644 --- a/pytorch_transformers/modeling_openai.py +++ b/pytorch_transformers/modeling_openai.py @@ -665,9 +665,10 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt') - choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary + tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!) + choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices - mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 + mc_token_ids = torch.tensor([input_ids.size(-1), input_ids.size(-1)]).unsqueeze(0) # Batch size 1 outputs = model(input_ids, mc_token_ids) lm_prediction_scores, mc_prediction_scores = outputs[:2]