[Proposal] GLUE processors included in library

This commit is contained in:
LysandreJik
2019-09-24 09:47:34 -04:00
parent 72402d1acd
commit f09e5ecef0
4 changed files with 230 additions and 204 deletions

View File

@@ -46,8 +46,7 @@ from pytorch_transformers import (WEIGHTS_NAME, BertConfig,
from pytorch_transformers import AdamW, WarmupLinearSchedule
from utils_glue import (compute_metrics, convert_examples_to_features,
output_modes, processors)
from pytorch_transformers.preprocessing import (compute_metrics, output_modes, processors, convert_examples_to_glue_features)
logger = logging.getLogger(__name__)
@@ -276,7 +275,7 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
# HACK(label indices are swapped in RoBERTa pretrained model)
label_list[1], label_list[2] = label_list[2], label_list[1]
examples = processor.get_dev_examples(args.data_dir) if evaluate else processor.get_train_examples(args.data_dir)
features = convert_examples_to_features(examples, label_list, args.max_seq_length, tokenizer, output_mode,
features = convert_examples_to_glue_features(examples, label_list, args.max_seq_length, tokenizer, output_mode,
pad_on_left=bool(args.model_type in ['xlnet']), # pad on the left for xlnet
pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
pad_token_segment_id=4 if args.model_type in ['xlnet'] else 0,