Experimental support for fairscale ShardedDDP (#9139)

* Experimental stupport for fairscale ShardedDDP

* Add import error if fairscale not available

* Address review comments

* Fix seq2seq trainer
This commit is contained in:
Sylvain Gugger
2020-12-16 13:47:48 -05:00
committed by GitHub
parent 1c1a2ffbff
commit 9a67185344
4 changed files with 78 additions and 19 deletions

View File

@@ -215,6 +215,9 @@ class TrainingArguments:
The backend to use for mixed precision training. Must be one of :obj:`"auto"`, :obj:`"amp"` or
:obj:`"apex"`. :obj:`"auto"` will use AMP or APEX depending on the PyTorch version detected, while the
other choices will force the requested backend.
sharded_ddp (:obj:`bool`, `optional`, defaults to :obj:`False`):
Use Sharded DDP training from `FairScale <https://github.com/facebookresearch/fairscale>`__ (in distributed
training only). This is an experimental feature.
"""
output_dir: str = field(
@@ -386,6 +389,10 @@ class TrainingArguments:
default="auto",
metadata={"help": "The backend to be used for mixed precision.", "choices": ["auto", "amp", "apex"]},
)
sharded_ddp: bool = field(
default=False,
metadata={"help": "Whether or not to use sharded DDP training (in distributed training only)."},
)
def __post_init__(self):
if self.disable_tqdm is None: