From 2cf3447e0a3e3fd04b715f1e6f4ee43575e1e7c9 Mon Sep 17 00:00:00 2001 From: Juha Kiili Date: Thu, 21 Nov 2019 12:35:25 +0200 Subject: [PATCH] Glue: log in Valohai-compatible JSON format too --- examples/run_glue.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/examples/run_glue.py b/examples/run_glue.py index 527e440075..ea5ac5bbb7 100644 --- a/examples/run_glue.py +++ b/examples/run_glue.py @@ -22,6 +22,7 @@ import glob import logging import os import random +import json import numpy as np import torch @@ -171,13 +172,21 @@ def train(args, train_dataset, model, tokenizer): if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0: # Log metrics + logs = {'step': global_step} if args.local_rank == -1 and args.evaluate_during_training: # Only evaluate when single GPU otherwise metrics may not average well results = evaluate(args, model, tokenizer) for key, value in results.items(): - tb_writer.add_scalar('eval_{}'.format(key), value, global_step) - tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step) - tb_writer.add_scalar('loss', (tr_loss - logging_loss)/args.logging_steps, global_step) + eval_key = 'eval_{}'.format(key) + tb_writer.add_scalar(eval_key, value, global_step) + logs[eval_key] = str(value) logging_loss = tr_loss + loss_scalar = (tr_loss - logging_loss) / args.logging_steps + learning_rate_scalar = scheduler.get_lr()[0] + tb_writer.add_scalar('lr', learning_rate_scalar, global_step) + tb_writer.add_scalar('loss', loss_scalar, global_step) + logs['learning_rate'] = learning_rate_scalar + logs['loss'] = loss_scalar + print(json.dumps(logs)) if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0: # Save model checkpoint