Big cleanup of glue_convert_examples_to_features (#3688)

* Big cleanup of `glue_convert_examples_to_features`

* Use batch_encode_plus

* Cleaner wrapping of glue_convert_examples_to_features for TF

@lysandrejik

* Cleanup syntax, thanks to @mfuntowicz

* Raise explicit error in case of user error
This commit is contained in:
Julien Chaumond
2020-04-10 10:20:18 -04:00
committed by GitHub
parent ce2298fb5f
commit f98d0ef2a2
6 changed files with 78 additions and 111 deletions

View File

@@ -63,12 +63,8 @@ class GLUETransformer(BaseTransformer):
examples,
self.tokenizer,
max_length=args.max_seq_length,
task=args.task,
label_list=self.labels,
output_mode=args.glue_output_mode,
pad_on_left=bool(args.model_type in ["xlnet"]), # pad on the left for xlnet
pad_token=self.tokenizer.convert_tokens_to_ids([self.tokenizer.pad_token])[0],
pad_token_segment_id=self.tokenizer.pad_token_type_id,
)
logger.info("Saving features into cached file %s", cached_features_file)
torch.save(features, cached_features_file)