From 4faeb38b51055d329f4cc5839cd1fefbe27f9d8f Mon Sep 17 00:00:00 2001
From: Ubuntu <hf@fatguy.qosot1f4rqdedg2kd4xr3fsx3d.bx.internal.cloudapp.net>
Date: Sat, 3 Nov 2018 17:52:51 +0000
Subject: [PATCH 1/2] Fix loss loss logging for multi-gpu compatibility

---
 run_classifier_pytorch.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/run_classifier_pytorch.py b/run_classifier_pytorch.py
index f8cf4af808..c8ec8ab6e2 100644
--- a/run_classifier_pytorch.py
+++ b/run_classifier_pytorch.py
@@ -529,10 +529,10 @@ def main():
                 label_ids = label_ids.to(device)
 
                 loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
-                total_tr_loss += loss.item()
+                total_tr_loss += loss.sum().item() # sum() is to account for multi-gpu support.
                 nb_tr_examples += input_ids.size(0)
                 model.zero_grad()
-                loss.backward()
+                loss.sum().backward() # sum() is to account for multi-gpu support.
                 optimizer.step()
                 global_step += 1
 
@@ -573,7 +573,7 @@ def main():
             label_ids = label_ids.to('cpu').numpy()
             tmp_eval_accuracy = accuracy(logits, label_ids)
 
-            eval_loss += tmp_eval_loss.item()
+            eval_loss += tmp_eval_loss.sum().item()
             eval_accuracy += tmp_eval_accuracy
             
             nb_eval_examples += input_ids.size(0)

From a1af5247e171354e8f39e577d861e63d7fa67a1e Mon Sep 17 00:00:00 2001
From: VictorSanh <victorsanh@gmail.com>
Date: Sat, 3 Nov 2018 14:00:36 -0400
Subject: [PATCH 2/2] Add seed in initialization

---
 run_classifier_pytorch.py | 12 ++++++++++--
 run_squad_pytorch.py      |  9 +++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/run_classifier_pytorch.py b/run_classifier_pytorch.py
index c8ec8ab6e2..3410a09b04 100644
--- a/run_classifier_pytorch.py
+++ b/run_classifier_pytorch.py
@@ -427,7 +427,10 @@ def main():
                         type=int,
                         default=-1,
                         help="local_rank for distributed training on gpus")
-
+    parser.add_argument('--seed', 
+                        type=int, 
+                        default=42,
+                        help="random seed for initialization")
     args = parser.parse_args()
 
     processors = {
@@ -444,7 +447,12 @@ def main():
         n_gpu = 1
         # print("Initializing the distributed backend: NCCL")
     print("device", device, "n_gpu", n_gpu)
-
+    
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    if n_gpu>0: torch.cuda.manual_seed_all(args.seed)
+    
     if not args.do_train and not args.do_eval:
         raise ValueError("At least one of `do_train` or `do_eval` must be True.")
 
diff --git a/run_squad_pytorch.py b/run_squad_pytorch.py
index 2a67262d96..a1db682cd4 100644
--- a/run_squad_pytorch.py
+++ b/run_squad_pytorch.py
@@ -745,6 +745,10 @@ def main():
                         type=int,
                         default=-1,
                         help="local_rank for distributed training on gpus")
+    parser.add_argument('--seed', 
+                    type=int, 
+                    default=42,
+                    help="random seed for initialization")
 
     args = parser.parse_args()
 
@@ -757,6 +761,11 @@ def main():
         # print("Initializing the distributed backend: NCCL")
     print("device", device, "n_gpu", n_gpu)
 
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    if n_gpu>0: torch.cuda.manual_seed_all(args.seed)
+
     if not args.do_train and not args.do_predict:
         raise ValueError("At least one of `do_train` or `do_predict` must be True.")