Merge branch 'master' of https://github.com/huggingface/pytorch-pretrained-BERT

2018-11-03 19:06:17 +01:00
parent 04287a4d68 a1af5247e1
commit 139873f6e3
2 changed files with 22 additions and 5 deletions
--- a/run_classifier_pytorch.py
+++ b/run_classifier_pytorch.py
@@ -427,7 +427,10 @@ def main():
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
-
+    parser.add_argument('--seed', 
+                        type=int, 
+                        default=42,
+                        help="random seed for initialization")
    args = parser.parse_args()

    processors = {
@@ -444,7 +447,12 @@ def main():
        n_gpu = 1
        # print("Initializing the distributed backend: NCCL")
    print("device", device, "n_gpu", n_gpu)
-
+    
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    if n_gpu>0: torch.cuda.manual_seed_all(args.seed)
+    
    if not args.do_train and not args.do_eval:
        raise ValueError("At least one of `do_train` or `do_eval` must be True.")

@@ -529,10 +537,10 @@ def main():
                label_ids = label_ids.to(device)

                loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
-                total_tr_loss += loss.item()
+                total_tr_loss += loss.sum().item() # sum() is to account for multi-gpu support.
                nb_tr_examples += input_ids.size(0)
                model.zero_grad()
-                loss.backward()
+                loss.sum().backward() # sum() is to account for multi-gpu support.
                optimizer.step()
                global_step += 1

@@ -573,7 +581,7 @@ def main():
            label_ids = label_ids.to('cpu').numpy()
            tmp_eval_accuracy = accuracy(logits, label_ids)

-            eval_loss += tmp_eval_loss.item()
+            eval_loss += tmp_eval_loss.sum().item()
            eval_accuracy += tmp_eval_accuracy

            nb_eval_examples += input_ids.size(0)