diff --git a/examples/run_ner.py b/examples/run_ner.py index aeb6a9a292..3649b84d34 100644 --- a/examples/run_ner.py +++ b/examples/run_ner.py @@ -153,7 +153,7 @@ def train(args, train_dataset, model, tokenizer, labels, pad_token_label_id): if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0: # Log metrics if args.local_rank == -1 and args.evaluate_during_training: # Only evaluate when single GPU otherwise metrics may not average well - results, _ = evaluate(args, model, tokenizer, labels, pad_token_label_id) + results, _ = evaluate(args, model, tokenizer, labels, pad_token_label_id, mode="dev") for key, value in results.items(): tb_writer.add_scalar("eval_{}".format(key), value, global_step) tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)