From 4a20b7c450093a5521a9481c8d26277084b22664 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Wed, 20 Jan 2021 16:50:21 -0800 Subject: [PATCH] [trainer] no --deepspeed and --sharded_ddp together (#9712) * no --deepspeed and --sharded_ddp together * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * style Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/trainer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index c5e745577e..edc7a09cec 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -337,12 +337,15 @@ class Trainer: # Setup Sharded DDP training self.sharded_dpp = False if args.sharded_ddp: + if args.deepspeed: + raise ValueError( + "Using --sharded_ddp together with --deepspeed is not possible, deactivate one of those flags." + ) + if args.local_rank == -1: raise ValueError("Using sharded DDP only works in distributed training.") elif not is_fairscale_available(): raise ImportError("Sharded DDP training requires fairscale: `pip install fairscale`.") - elif args.deepspeed: - raise ValueError("can't use --sharded_ddp together with --deepspeed.") else: self.sharded_dpp = True