From be3b9bcf4db4f7e942c7f71eb1d7de3a8d476ad0 Mon Sep 17 00:00:00 2001 From: Jade Abbott Date: Thu, 3 Jan 2019 09:02:33 +0200 Subject: [PATCH 1/4] Allow one to use the pretrained model in evaluation when do_train is not selected --- examples/run_classifier.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index adf81f4e28..9236c6a252 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -430,8 +430,8 @@ def main(): if not args.do_train and not args.do_eval: raise ValueError("At least one of `do_train` or `do_eval` must be True.") - - if os.path.exists(args.output_dir) and os.listdir(args.output_dir): + + if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train: raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir)) os.makedirs(args.output_dir, exist_ok=True) @@ -554,7 +554,8 @@ def main(): # Save a trained model model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self output_model_file = os.path.join(args.output_dir, "pytorch_model.bin") - torch.save(model_to_save.state_dict(), output_model_file) + if args.do_train: + torch.save(model_to_save.state_dict(), output_model_file) # Load a trained model that you have fine-tuned model_state_dict = torch.load(output_model_file) From b96149a19b225cc2eabd14c3227b8acc9b268b49 Mon Sep 17 00:00:00 2001 From: Jade Abbott Date: Thu, 3 Jan 2019 10:31:56 +0200 Subject: [PATCH 2/4] Training loss is not initialized if only do_eval is specified --- examples/run_classifier.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index 9236c6a252..c99cc0e12a 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -430,7 +430,7 @@ def main(): if not args.do_train and not args.do_eval: raise ValueError("At least one of `do_train` or `do_eval` must be True.") - + if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train: raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir)) os.makedirs(args.output_dir, exist_ok=True) @@ -503,6 +503,7 @@ def main(): t_total=t_total) global_step = 0 + tr_loss = 0 if args.do_train: train_features = convert_examples_to_features( train_examples, label_list, args.max_seq_length, tokenizer) @@ -581,7 +582,8 @@ def main(): model.eval() eval_loss, eval_accuracy = 0, 0 nb_eval_steps, nb_eval_examples = 0, 0 - for input_ids, input_mask, segment_ids, label_ids in eval_dataloader: + + for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating"): input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) @@ -603,11 +605,11 @@ def main(): eval_loss = eval_loss / nb_eval_steps eval_accuracy = eval_accuracy / nb_eval_examples - + loss = tr_loss/nb_tr_steps if args.do_train else None result = {'eval_loss': eval_loss, 'eval_accuracy': eval_accuracy, 'global_step': global_step, - 'loss': tr_loss/nb_tr_steps} + 'loss': loss} output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: From c64de50ea4b4cec7e87732abb621bf70c8fa8763 Mon Sep 17 00:00:00 2001 From: Jade Abbott Date: Thu, 3 Jan 2019 12:34:57 +0200 Subject: [PATCH 3/4] nb_tr_steps is not initialized --- examples/run_classifier.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index c99cc0e12a..8441c86937 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -503,6 +503,7 @@ def main(): t_total=t_total) global_step = 0 + nb_tr_steps = 0 tr_loss = 0 if args.do_train: train_features = convert_examples_to_features( @@ -565,6 +566,7 @@ def main(): if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = processor.get_dev_examples(args.data_dir) + # should tokenize this too. eval_features = convert_examples_to_features( eval_examples, label_list, args.max_seq_length, tokenizer) logger.info("***** Running evaluation *****") From 193e2df8ba95efd6e3326cb0907576a0c74f1d74 Mon Sep 17 00:00:00 2001 From: Jade Abbott Date: Thu, 3 Jan 2019 13:13:06 +0200 Subject: [PATCH 4/4] Remove rogue comment --- examples/run_classifier.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index 8441c86937..be212edc1b 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -566,7 +566,6 @@ def main(): if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = processor.get_dev_examples(args.data_dir) - # should tokenize this too. eval_features = convert_examples_to_features( eval_examples, label_list, args.max_seq_length, tokenizer) logger.info("***** Running evaluation *****")