Depricate xpu_backend for ddp_backend (#23085)

* Depricate xpu_backend for ddp_backend * Typo * Only do a minor deprecation, no need for major Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --------- Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2023-05-01 09:44:47 -04:00
parent 95cf3725b4
commit 9884862383
3 changed files with 29 additions and 13 deletions
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -325,8 +325,8 @@ class TrainingArguments:
            experimental API and it may change.
        local_rank (`int`, *optional*, defaults to -1):
            Rank of the process during distributed training.
-        xpu_backend (`str`, *optional*):
-            The backend to use for xpu distributed training. Must be one of `"mpi"` or `"ccl"` or `"gloo"`.
+        ddp_backend (`str`, *optional*):
+            The backend to use for distributed training. Must be one of `"nccl"`, `"mpi"`, `"ccl"`, `"gloo"`.
        tpu_num_cores (`int`, *optional*):
            When training on TPU, the number of TPU cores (automatically passed by launcher script).
        dataloader_drop_last (`bool`, *optional*, defaults to `False`):
@@ -822,11 +822,11 @@ class TrainingArguments:
        },
    )
    local_rank: int = field(default=-1, metadata={"help": "For distributed training: local_rank"})
-    xpu_backend: Optional[str] = field(
+    ddp_backend: Optional[str] = field(
        default=None,
        metadata={
-            "help": "The backend to be used for distributed training on Intel XPU.",
-            "choices": ["mpi", "ccl", "gloo"],
+            "help": "The backend to be used for distributed training",
+            "choices": ["nccl", "gloo", "mpi", "ccl"],
        },
    )
    tpu_num_cores: Optional[int] = field(
@@ -1123,6 +1123,14 @@ class TrainingArguments:
        },
    )

+    xpu_backend: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "The backend to be used for distributed training on Intel XPU.",
+            "choices": ["mpi", "ccl", "gloo"],
+        },
+    )
+
    def __post_init__(self):
        # expand paths, if not os.makedirs("~/bar") will make directory
        # in the current directory instead of the actual home
@@ -1146,6 +1154,14 @@ class TrainingArguments:
            # Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it.
            self.evaluation_strategy = self.evaluation_strategy.value

+        if self.xpu_backend is not None:
+            warnings.warn(
+                "using `xpu_backend` is deprecated and will be removed in version 4.31"
+                " of 🤗 Transformers. Use `ddp_backend` instead",
+                FutureWarning,
+            )
+            self.ddp_backend = self.xpu_backend
+
        self.evaluation_strategy = IntervalStrategy(self.evaluation_strategy)
        self.logging_strategy = IntervalStrategy(self.logging_strategy)
        self.save_strategy = IntervalStrategy(self.save_strategy)
@@ -1544,7 +1560,7 @@ class TrainingArguments:
                "Using the `Trainer` with `PyTorch` requires `accelerate`: Run `pip install --upgrade accelerate`"
            )
        if self.no_cuda:
-            self.distributed_state = PartialState(cpu=True)
+            self.distributed_state = PartialState(cpu=True, backend=self.ddp_backend)
            self._n_gpu = 0
        elif is_sagemaker_mp_enabled():
            local_rank = smp.local_rank()
@@ -1558,7 +1574,7 @@ class TrainingArguments:
            del os.environ["ACCELERATE_USE_DEEPSPEED"]
            self._n_gpu = 1
        else:
-            self.distributed_state = PartialState(backend=self.xpu_backend)
+            self.distributed_state = PartialState(backend=self.ddp_backend)
            self._n_gpu = 1
        if not is_sagemaker_mp_enabled():
            device = self.distributed_state.device