[deepspeed] Enable multiple test runs on single box, defer to DS_TEST_PORT if set (#14331)

* defer to DS_TEST_PORT if set

* style

Co-authored-by: Stas Bekman <stas@stason.org>
This commit is contained in:
Jeff Rasley
2021-11-08 12:40:29 -08:00
committed by GitHub
parent dfb00bf644
commit d0e96c6de6
2 changed files with 14 additions and 4 deletions

View File

@@ -41,6 +41,9 @@ with ExtendSysPath(tests_dir):
set_seed(42)
# default torch.distributed port
DEFAULT_MASTER_PORT = "10999"
# translation
FSMT_TINY = "stas/tiny-wmt19-en-de"
BART_TINY = "sshleifer/bart-tiny-random"
@@ -89,7 +92,8 @@ def get_launcher(distributed=False):
# 2. for now testing with just 2 gpus max (since some quality tests may give different
# results with mode gpus because we use very little data)
num_gpus = min(2, get_gpu_count()) if distributed else 1
return f"deepspeed --num_nodes 1 --num_gpus {num_gpus}".split()
master_port = os.environ.get("DS_TEST_PORT", DEFAULT_MASTER_PORT)
return f"deepspeed --num_nodes 1 --num_gpus {num_gpus} --master_port {master_port}".split()
def make_task_cmds():