Tokenizers: Start cleaning examples a little (#3455)

* Start cleaning examples

* Fixup
This commit is contained in:
Julien Chaumond
2020-04-01 07:13:40 -04:00
committed by GitHub
parent b38d552a92
commit 50e15c825c
8 changed files with 14 additions and 14 deletions

View File

@@ -361,7 +361,7 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False, test=False):
args.max_seq_length,
tokenizer,
pad_on_left=bool(args.model_type in ["xlnet"]), # pad on the left for xlnet
pad_token_segment_id=4 if args.model_type in ["xlnet"] else 0,
pad_token_segment_id=tokenizer.pad_token_type_id,
)
if args.local_rank in [-1, 0]:
logger.info("Saving features into cached file %s", cached_features_file)