From e52d1258e010b88d3507a4f527c6201616c119ad Mon Sep 17 00:00:00 2001 From: Ethan Perez Date: Mon, 6 Apr 2020 15:52:22 -0500 Subject: [PATCH] Fix RoBERTa/XLNet Pad Token in run_multiple_choice.py (#3631) * Fix RoBERTa/XLNet Pad Token in run_multiple_choice.py `convert_examples_to_fes atures` sets `pad_token=0` by default, which is correct for BERT but incorrect for RoBERTa (`pad_token=1`) and XLNet (`pad_token=5`). I think the other arguments to `convert_examples_to_features` are correct, but it might be helpful if someone checked who is more familiar with this part of the codebase. * Simplifying change to match recent commits --- examples/run_multiple_choice.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/run_multiple_choice.py b/examples/run_multiple_choice.py index dbeae2b689..578ce01226 100644 --- a/examples/run_multiple_choice.py +++ b/examples/run_multiple_choice.py @@ -361,6 +361,7 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False, test=False): args.max_seq_length, tokenizer, pad_on_left=bool(args.model_type in ["xlnet"]), # pad on the left for xlnet + pad_token=tokenizer.pad_token_id, pad_token_segment_id=tokenizer.pad_token_type_id, ) if args.local_rank in [-1, 0]: