Big cleanup of glue_convert_examples_to_features (#3688)

* Big cleanup of `glue_convert_examples_to_features`

* Use batch_encode_plus

* Cleaner wrapping of glue_convert_examples_to_features for TF

@lysandrejik

* Cleanup syntax, thanks to @mfuntowicz

* Raise explicit error in case of user error
This commit is contained in:
Julien Chaumond
2020-04-10 10:20:18 -04:00
committed by GitHub
parent ce2298fb5f
commit f98d0ef2a2
6 changed files with 78 additions and 111 deletions

View File

@@ -354,14 +354,7 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
processor.get_dev_examples(args.data_dir) if evaluate else processor.get_train_examples(args.data_dir)
)
features = convert_examples_to_features(
examples,
tokenizer,
label_list=label_list,
max_length=args.max_seq_length,
output_mode=output_mode,
pad_on_left=bool(args.model_type in ["xlnet"]), # pad on the left for xlnet
pad_token=tokenizer.pad_token_id,
pad_token_segment_id=tokenizer.pad_token_type_id,
examples, tokenizer, max_length=args.max_seq_length, label_list=label_list, output_mode=output_mode,
)
if args.local_rank in [-1, 0]:
logger.info("Saving features into cached file %s", cached_features_file)