[trainer] no --deepspeed and --sharded_ddp together (#9712)

* no --deepspeed and --sharded_ddp together

* Update src/transformers/trainer.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* style

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Stas Bekman
2021-01-20 16:50:21 -08:00
committed by GitHub
parent 7acfa95afb
commit 4a20b7c450

View File

@@ -337,12 +337,15 @@ class Trainer:
# Setup Sharded DDP training # Setup Sharded DDP training
self.sharded_dpp = False self.sharded_dpp = False
if args.sharded_ddp: if args.sharded_ddp:
if args.deepspeed:
raise ValueError(
"Using --sharded_ddp together with --deepspeed is not possible, deactivate one of those flags."
)
if args.local_rank == -1: if args.local_rank == -1:
raise ValueError("Using sharded DDP only works in distributed training.") raise ValueError("Using sharded DDP only works in distributed training.")
elif not is_fairscale_available(): elif not is_fairscale_available():
raise ImportError("Sharded DDP training requires fairscale: `pip install fairscale`.") raise ImportError("Sharded DDP training requires fairscale: `pip install fairscale`.")
elif args.deepspeed:
raise ValueError("can't use --sharded_ddp together with --deepspeed.")
else: else:
self.sharded_dpp = True self.sharded_dpp = True