[tests] make TestDeepSpeedModelZoo device-agnostic (#31402)

* fix

* use accelerator device count

* ci fix
This commit is contained in:
Fanli Lin
2024-06-17 22:42:57 +08:00
committed by GitHub
parent 7977f206dc
commit 9454f437b0
2 changed files with 9 additions and 4 deletions

View File

@@ -23,12 +23,13 @@ from tests.trainer.test_trainer import TrainerIntegrationCommon # noqa
from transformers import is_torch_available
from transformers.testing_utils import (
TestCasePlus,
backend_device_count,
execute_subprocess_async,
get_gpu_count,
get_tests_dir,
require_deepspeed,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
from transformers.trainer_utils import set_seed
@@ -143,7 +144,7 @@ def get_launcher(distributed=False):
# - it won't be able to handle that
# 2. for now testing with just 2 gpus max (since some quality tests may give different
# results with mode gpus because we use very little data)
num_gpus = min(2, get_gpu_count()) if distributed else 1
num_gpus = min(2, backend_device_count(torch_device)) if distributed else 1
master_port = os.environ.get("DS_TEST_PORT", DEFAULT_MASTER_PORT)
return f"deepspeed --num_nodes 1 --num_gpus {num_gpus} --master_port {master_port}".split()
@@ -326,7 +327,7 @@ params = list(itertools.product(stages, task_cmds.keys()))
@slow
@require_deepspeed
@require_torch_gpu
@require_torch_accelerator
class TestDeepSpeedModelZoo(TestCasePlus):
"""This class is for testing via an external script - can do multiple gpus"""