[DeepSpeed in notebooks] Jupyter + Colab (#10130)
* init devices/setup explicitly * docs + test * simplify * cleanup * cleanup * cleanup * correct the required dist setup * derive local_rank from env LOCAL_RANK
This commit is contained in:
@@ -239,6 +239,9 @@ class Trainer:
|
||||
self.hp_name = None
|
||||
self.deepspeed = None
|
||||
|
||||
# force device and distributed setup init explicitly
|
||||
args._setup_devices
|
||||
|
||||
if model is None:
|
||||
if model_init is not None:
|
||||
self.model_init = model_init
|
||||
|
||||
@@ -561,6 +561,12 @@ class TrainingArguments:
|
||||
import deepspeed
|
||||
|
||||
deepspeed.init_distributed()
|
||||
|
||||
# workaround for setups like notebooks where the launcher can't be used,
|
||||
# but deepspeed requires a dist env.
|
||||
# env LOCAL_RANK could be set manually by the user, or via init_distributed if mpi4py is installed
|
||||
self.local_rank = int(os.environ.get("LOCAL_RANK", "-1"))
|
||||
|
||||
device = torch.device("cuda", self.local_rank)
|
||||
self._n_gpu = 1
|
||||
elif self.local_rank == -1:
|
||||
|
||||
Reference in New Issue
Block a user