From d55c3ae83f5d1c22192142e4a7d4da2f4f74c790 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Sun, 4 Nov 2018 16:28:10 -0500 Subject: [PATCH] Small logger bug (multi-gpu, distribution) in training --- run_classifier.py | 2 +- run_squad.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/run_classifier.py b/run_classifier.py index f1a102253a..58eb039d93 100644 --- a/run_classifier.py +++ b/run_classifier.py @@ -420,7 +420,7 @@ def main(): n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') - logger.info("device", device, "n_gpu", n_gpu, "distributed training", bool(args.local_rank != -1)) + logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1)) if args.accumulate_gradients < 1: raise ValueError("Invalid accumulate_gradients parameter: {}, should be >= 1".format( diff --git a/run_squad.py b/run_squad.py index a1952244e0..868dc99a23 100644 --- a/run_squad.py +++ b/run_squad.py @@ -750,7 +750,7 @@ def main(): n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') - logger.info("device", device, "n_gpu", n_gpu, "distributed training", bool(args.local_rank != -1)) + logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1)) if args.accumulate_gradients < 1: raise ValueError("Invalid accumulate_gradients parameter: {}, should be >= 1".format(