[trainer] no --deepspeed and --sharded_ddp together (#9712)
* no --deepspeed and --sharded_ddp together * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * style Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -337,12 +337,15 @@ class Trainer:
|
||||
# Setup Sharded DDP training
|
||||
self.sharded_dpp = False
|
||||
if args.sharded_ddp:
|
||||
if args.deepspeed:
|
||||
raise ValueError(
|
||||
"Using --sharded_ddp together with --deepspeed is not possible, deactivate one of those flags."
|
||||
)
|
||||
|
||||
if args.local_rank == -1:
|
||||
raise ValueError("Using sharded DDP only works in distributed training.")
|
||||
elif not is_fairscale_available():
|
||||
raise ImportError("Sharded DDP training requires fairscale: `pip install fairscale`.")
|
||||
elif args.deepspeed:
|
||||
raise ValueError("can't use --sharded_ddp together with --deepspeed.")
|
||||
else:
|
||||
self.sharded_dpp = True
|
||||
|
||||
|
||||
Reference in New Issue
Block a user