Smp grad accum (#10488)

* Fix gradient accumulation for SM Model Parallelism * Style and divide loss by grad accum steps
2021-03-03 12:13:29 -05:00
parent d064fb5647
commit b70f441b72
4 changed files with 16 additions and 6 deletions
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -737,6 +737,13 @@ class TrainingArguments:
        """
        return True

+    @property
+    def _no_sync_in_gradient_accumulation(self):
+        """
+        Whether or not to use no_sync for the gradients when doing gradient accumulation.
+        """
+        return not self.deepspeed
+
    def to_dict(self):
        """
        Serializes this instance while replace `Enum` by their values (for JSON serialization support).