Depricate xpu_backend for ddp_backend (#23085)

* Depricate xpu_backend for ddp_backend

* Typo

* Only do a minor deprecation, no need for major

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

---------

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Zachary Mueller
2023-05-01 09:44:47 -04:00
committed by GitHub
parent 95cf3725b4
commit 9884862383
3 changed files with 29 additions and 13 deletions

View File

@@ -325,8 +325,8 @@ class TrainingArguments:
experimental API and it may change.
local_rank (`int`, *optional*, defaults to -1):
Rank of the process during distributed training.
xpu_backend (`str`, *optional*):
The backend to use for xpu distributed training. Must be one of `"mpi"` or `"ccl"` or `"gloo"`.
ddp_backend (`str`, *optional*):
The backend to use for distributed training. Must be one of `"nccl"`, `"mpi"`, `"ccl"`, `"gloo"`.
tpu_num_cores (`int`, *optional*):
When training on TPU, the number of TPU cores (automatically passed by launcher script).
dataloader_drop_last (`bool`, *optional*, defaults to `False`):
@@ -822,11 +822,11 @@ class TrainingArguments:
},
)
local_rank: int = field(default=-1, metadata={"help": "For distributed training: local_rank"})
xpu_backend: Optional[str] = field(
ddp_backend: Optional[str] = field(
default=None,
metadata={
"help": "The backend to be used for distributed training on Intel XPU.",
"choices": ["mpi", "ccl", "gloo"],
"help": "The backend to be used for distributed training",
"choices": ["nccl", "gloo", "mpi", "ccl"],
},
)
tpu_num_cores: Optional[int] = field(
@@ -1123,6 +1123,14 @@ class TrainingArguments:
},
)
xpu_backend: Optional[str] = field(
default=None,
metadata={
"help": "The backend to be used for distributed training on Intel XPU.",
"choices": ["mpi", "ccl", "gloo"],
},
)
def __post_init__(self):
# expand paths, if not os.makedirs("~/bar") will make directory
# in the current directory instead of the actual home
@@ -1146,6 +1154,14 @@ class TrainingArguments:
# Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it.
self.evaluation_strategy = self.evaluation_strategy.value
if self.xpu_backend is not None:
warnings.warn(
"using `xpu_backend` is deprecated and will be removed in version 4.31"
" of 🤗 Transformers. Use `ddp_backend` instead",
FutureWarning,
)
self.ddp_backend = self.xpu_backend
self.evaluation_strategy = IntervalStrategy(self.evaluation_strategy)
self.logging_strategy = IntervalStrategy(self.logging_strategy)
self.save_strategy = IntervalStrategy(self.save_strategy)
@@ -1544,7 +1560,7 @@ class TrainingArguments:
"Using the `Trainer` with `PyTorch` requires `accelerate`: Run `pip install --upgrade accelerate`"
)
if self.no_cuda:
self.distributed_state = PartialState(cpu=True)
self.distributed_state = PartialState(cpu=True, backend=self.ddp_backend)
self._n_gpu = 0
elif is_sagemaker_mp_enabled():
local_rank = smp.local_rank()
@@ -1558,7 +1574,7 @@ class TrainingArguments:
del os.environ["ACCELERATE_USE_DEEPSPEED"]
self._n_gpu = 1
else:
self.distributed_state = PartialState(backend=self.xpu_backend)
self.distributed_state = PartialState(backend=self.ddp_backend)
self._n_gpu = 1
if not is_sagemaker_mp_enabled():
device = self.distributed_state.device