Patch with accelerate xpu (#25714)

* patch with accelerate xpu

* patch with accelerate xpu

* formatting

* fix tests

* revert ruff unrelated fixes

* revert ruff unrelated fixes

* revert ruff unrelated fixes

* fix test

* review fixes

* review fixes

* black fixed

* review commits

* review commits

* style fix

* use pytorch_utils

* revert markuplm test
This commit is contained in:
Abhilash Majumder
2023-09-05 20:11:42 +05:30
committed by GitHub
parent aa5c94d38d
commit 70a98024b1
7 changed files with 127 additions and 14 deletions

View File

@@ -22,6 +22,7 @@ from transformers.testing_utils import (
execute_subprocess_async,
get_torch_dist_unique_port,
require_torch_multi_gpu,
require_torch_multi_xpu,
require_torch_neuroncore,
require_torch_npu,
)
@@ -158,6 +159,20 @@ class TestTrainerDistributed(TestCasePlus):
# successful return here == success - any errors would have caused an error in the sub-call
@require_torch_multi_xpu
class TestTrainerDistributedXPU(TestCasePlus):
def test_trainer(self):
distributed_args = f"""--nproc_per_node={torch.xpu.device_count()}
--master_port={get_torch_dist_unique_port()}
{self.test_file_dir}/test_trainer_distributed.py
""".split()
output_dir = self.get_auto_remove_tmp_dir()
args = f"--output_dir {output_dir}".split()
cmd = ["torchrun"] + distributed_args + args
execute_subprocess_async(cmd, env=self.get_env())
# successful return here == success - any errors would have caused an error in the sub-call
if __name__ == "__main__":
# The script below is meant to be run under torch.distributed, on a machine with multiple GPUs:
#