From e0bf01d9a925e2a2280fea77c48ebd696b66e83e Mon Sep 17 00:00:00 2001 From: Ananya Harsh Jha Date: Sat, 16 Mar 2019 14:10:48 -0400 Subject: [PATCH] added hack for mismatched MNLI --- examples/run_classifier.py | 66 +++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index 51d57be4dc..588e876af2 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -679,7 +679,6 @@ def main(): output_modes = { "cola": "classification", "mnli": "classification", - "mnli-mm": "classification", "mrpc": "classification", "sst-2": "classification", "sts-b": "regression", @@ -930,6 +929,8 @@ def main(): preds = preds[0] if output_mode == "classification": preds = np.argmax(preds, axis=1) + elif output_mode == "regression": + preds = np.squeeze(preds) result = compute_metrics(task_name, preds, all_label_ids.numpy()) loss = tr_loss/nb_tr_steps if args.do_train else None @@ -943,6 +944,69 @@ def main(): for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) + + # hack for MNLI-MM + if task_name == "mnli": + task_name = "mnli-mm" + processor = processors[task_name]() + + eval_examples = processor.get_dev_examples(args.data_dir) + eval_features = convert_examples_to_features( + eval_examples, label_list, args.max_seq_length, tokenizer, output_mode) + logger.info("***** Running evaluation *****") + logger.info(" Num examples = %d", len(eval_examples)) + logger.info(" Batch size = %d", args.eval_batch_size) + all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) + all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) + all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) + all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) + + eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) + # Run prediction for full data + eval_sampler = SequentialSampler(eval_data) + eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) + + model.eval() + eval_loss = 0 + nb_eval_steps = 0 + preds = [] + + for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating"): + input_ids = input_ids.to(device) + input_mask = input_mask.to(device) + segment_ids = segment_ids.to(device) + label_ids = label_ids.to(device) + + with torch.no_grad(): + logits = model(input_ids, segment_ids, input_mask, labels=None) + + loss_fct = CrossEntropyLoss() + tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1)) + + eval_loss += tmp_eval_loss.mean().item() + nb_eval_steps += 1 + if len(preds) == 0: + preds.append(logits.detach().cpu().numpy()) + else: + preds[0] = np.append( + preds[0], logits.detach().cpu().numpy(), axis=0) + + eval_loss = eval_loss / nb_eval_steps + preds = preds[0] + preds = np.argmax(preds, axis=1) + result = compute_metrics(task_name, preds, all_label_ids.numpy()) + loss = tr_loss/nb_tr_steps if args.do_train else None + + result['eval_loss'] = eval_loss + result['global_step'] = global_step + result['loss'] = loss + + output_eval_file = os.path.join(args.output_dir + '-MM', "eval_results.txt") + with open(output_eval_file, "w") as writer: + logger.info("***** Eval results *****") + for key in sorted(result.keys()): + logger.info(" %s = %s", key, str(result[key])) + writer.write("%s = %s\n" % (key, str(result[key]))) if __name__ == "__main__": main()