Smp grad accum (#10488)

* Fix gradient accumulation for SM Model Parallelism

* Style and divide loss by grad accum steps
This commit is contained in:
Sylvain Gugger
2021-03-03 12:13:29 -05:00
committed by GitHub
parent d064fb5647
commit b70f441b72
4 changed files with 16 additions and 6 deletions

View File

@@ -737,6 +737,13 @@ class TrainingArguments:
"""
return True
@property
def _no_sync_in_gradient_accumulation(self):
"""
Whether or not to use no_sync for the gradients when doing gradient accumulation.
"""
return not self.deepspeed
def to_dict(self):
"""
Serializes this instance while replace `Enum` by their values (for JSON serialization support).