From 365fb34c6c82c6bd0e63e10f079b635227c675e8 Mon Sep 17 00:00:00 2001 From: Aneesh Pappu Date: Tue, 30 Apr 2019 13:53:04 -0700 Subject: [PATCH] small fix to remove shifting of lm labels during pre process of roc stories, as this shifting happens interanlly in the model --- examples/run_openai_gpt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/run_openai_gpt.py b/examples/run_openai_gpt.py index cb5aa8d9cb..e9183a79ae 100644 --- a/examples/run_openai_gpt.py +++ b/examples/run_openai_gpt.py @@ -83,8 +83,8 @@ def pre_process_datasets(encoded_datasets, input_len, cap_length, start_token, d input_ids[i, 1, :len(with_cont2)] = with_cont2 mc_token_ids[i, 0] = len(with_cont1) - 1 mc_token_ids[i, 1] = len(with_cont2) - 1 - lm_labels[i, 0, :len(with_cont1)-1] = with_cont1[1:] - lm_labels[i, 1, :len(with_cont2)-1] = with_cont2[1:] + lm_labels[i, 0, :len(with_cont1)] = with_cont1 + lm_labels[i, 1, :len(with_cont2)] = with_cont2 mc_labels[i] = mc_label all_inputs = (input_ids, mc_token_ids, lm_labels, mc_labels) tensor_datasets.append(tuple(torch.tensor(t) for t in all_inputs))