From ed8fad73903c670d41a9dff173bc44995cda2d2f Mon Sep 17 00:00:00 2001
From: Mathieu Prouveur <mathieu@sancare.fr>
Date: Wed, 24 Apr 2019 14:07:00 +0200
Subject: [PATCH 1/2] Update example files so that tr_loss is not affected by
 args.gradient_accumulation_step

---
 examples/run_classifier.py | 2 +-
 examples/run_swag.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/run_classifier.py b/examples/run_classifier.py
index b90ac494e4..e14788cacb 100644
--- a/examples/run_classifier.py
+++ b/examples/run_classifier.py
@@ -845,7 +845,7 @@ def main():
                 else:
                     loss.backward()
 
-                tr_loss += loss.item()
+                tr_loss += loss.item() * args.gradient_accumulation_steps
                 nb_tr_examples += input_ids.size(0)
                 nb_tr_steps += 1
                 if (step + 1) % args.gradient_accumulation_steps == 0:
diff --git a/examples/run_swag.py b/examples/run_swag.py
index a6cfdbe311..5a65d7a748 100644
--- a/examples/run_swag.py
+++ b/examples/run_swag.py
@@ -452,7 +452,7 @@ def main():
                     loss = loss * args.loss_scale
                 if args.gradient_accumulation_steps > 1:
                     loss = loss / args.gradient_accumulation_steps
-                tr_loss += loss.item()
+                tr_loss += loss.item() * args.gradient_accumulation_steps
                 nb_tr_examples += input_ids.size(0)
                 nb_tr_steps += 1
 

From 87b9ec3843f7f9a81253075f92c9e6537ecefe1c Mon Sep 17 00:00:00 2001
From: Mathieu Prouveur <mathieu@sancare.fr>
Date: Mon, 29 Apr 2019 12:58:29 +0200
Subject: [PATCH 2/2] Fix tr_loss rescaling factor using global_step

---
 examples/run_classifier.py | 6 +++---
 examples/run_swag.py       | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/run_classifier.py b/examples/run_classifier.py
index e14788cacb..f678525b15 100644
--- a/examples/run_classifier.py
+++ b/examples/run_classifier.py
@@ -845,7 +845,7 @@ def main():
                 else:
                     loss.backward()
 
-                tr_loss += loss.item() * args.gradient_accumulation_steps
+                tr_loss += loss.item()
                 nb_tr_examples += input_ids.size(0)
                 nb_tr_steps += 1
                 if (step + 1) % args.gradient_accumulation_steps == 0:
@@ -936,7 +936,7 @@ def main():
         elif output_mode == "regression":
             preds = np.squeeze(preds)
         result = compute_metrics(task_name, preds, all_label_ids.numpy())
-        loss = tr_loss/nb_tr_steps if args.do_train else None
+        loss = tr_loss/global_step if args.do_train else None
 
         result['eval_loss'] = eval_loss
         result['global_step'] = global_step
@@ -1004,7 +1004,7 @@ def main():
             preds = preds[0]
             preds = np.argmax(preds, axis=1)
             result = compute_metrics(task_name, preds, all_label_ids.numpy())
-            loss = tr_loss/nb_tr_steps if args.do_train else None
+            loss = tr_loss/global_step if args.do_train else None
 
             result['eval_loss'] = eval_loss
             result['global_step'] = global_step
diff --git a/examples/run_swag.py b/examples/run_swag.py
index 5a65d7a748..4fb32549cb 100644
--- a/examples/run_swag.py
+++ b/examples/run_swag.py
@@ -452,7 +452,7 @@ def main():
                     loss = loss * args.loss_scale
                 if args.gradient_accumulation_steps > 1:
                     loss = loss / args.gradient_accumulation_steps
-                tr_loss += loss.item() * args.gradient_accumulation_steps
+                tr_loss += loss.item()
                 nb_tr_examples += input_ids.size(0)
                 nb_tr_steps += 1
 
@@ -537,7 +537,7 @@ def main():
         result = {'eval_loss': eval_loss,
                   'eval_accuracy': eval_accuracy,
                   'global_step': global_step,
-                  'loss': tr_loss/nb_tr_steps}
+                  'loss': tr_loss/global_step}
 
         output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
         with open(output_eval_file, "w") as writer: