[trainer] no --deepspeed and --sharded_ddp together (#9712)

* no --deepspeed and --sharded_ddp together * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * style Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2021-01-20 16:50:21 -08:00
parent 7acfa95afb
commit 4a20b7c450
1 changed files with 5 additions and 2 deletions
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -337,12 +337,15 @@ class Trainer:
        # Setup Sharded DDP training
        self.sharded_dpp = False
        if args.sharded_ddp:
+            if args.deepspeed:
+                raise ValueError(
+                    "Using --sharded_ddp together with --deepspeed is not possible, deactivate one of those flags."
+                )
+
            if args.local_rank == -1:
                raise ValueError("Using sharded DDP only works in distributed training.")
            elif not is_fairscale_available():
                raise ImportError("Sharded DDP training requires fairscale: `pip install fairscale`.")
-            elif args.deepspeed:
-                raise ValueError("can't use --sharded_ddp together with --deepspeed.")
            else:
                self.sharded_dpp = True