[trainer] no --deepspeed and --sharded_ddp together (#9712)
* no --deepspeed and --sharded_ddp together * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * style Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -337,12 +337,15 @@ class Trainer:
|
|||||||
# Setup Sharded DDP training
|
# Setup Sharded DDP training
|
||||||
self.sharded_dpp = False
|
self.sharded_dpp = False
|
||||||
if args.sharded_ddp:
|
if args.sharded_ddp:
|
||||||
|
if args.deepspeed:
|
||||||
|
raise ValueError(
|
||||||
|
"Using --sharded_ddp together with --deepspeed is not possible, deactivate one of those flags."
|
||||||
|
)
|
||||||
|
|
||||||
if args.local_rank == -1:
|
if args.local_rank == -1:
|
||||||
raise ValueError("Using sharded DDP only works in distributed training.")
|
raise ValueError("Using sharded DDP only works in distributed training.")
|
||||||
elif not is_fairscale_available():
|
elif not is_fairscale_available():
|
||||||
raise ImportError("Sharded DDP training requires fairscale: `pip install fairscale`.")
|
raise ImportError("Sharded DDP training requires fairscale: `pip install fairscale`.")
|
||||||
elif args.deepspeed:
|
|
||||||
raise ValueError("can't use --sharded_ddp together with --deepspeed.")
|
|
||||||
else:
|
else:
|
||||||
self.sharded_dpp = True
|
self.sharded_dpp = True
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user