[DeepSpeed in notebooks] Jupyter + Colab (#10130)

* init devices/setup explicitly * docs + test * simplify * cleanup * cleanup * cleanup * correct the required dist setup * derive local_rank from env LOCAL_RANK
2021-02-11 14:02:05 -08:00
parent 6710d1d5ef
commit b54cb0bd82
4 changed files with 108 additions and 0 deletions
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -239,6 +239,9 @@ class Trainer:
        self.hp_name = None
        self.deepspeed = None

+        # force device and distributed setup init explicitly
+        args._setup_devices
+
        if model is None:
            if model_init is not None:
                self.model_init = model_init
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -561,6 +561,12 @@ class TrainingArguments:
            import deepspeed

            deepspeed.init_distributed()
+
+            # workaround for setups like notebooks where the launcher can't be used,
+            # but deepspeed requires a dist env.
+            # env LOCAL_RANK could be set manually by the user, or via init_distributed if mpi4py is installed
+            self.local_rank = int(os.environ.get("LOCAL_RANK", "-1"))
+
            device = torch.device("cuda", self.local_rank)
            self._n_gpu = 1
        elif self.local_rank == -1: