diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 8ed88d931e..e401a0b8d3 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -629,20 +629,7 @@ class Trainer: # bf16 does not need grad scaling self.do_grad_scaling = self.amp_dtype == torch.float16 if self.do_grad_scaling: - if self.sharded_ddp is not None: - self.scaler = ShardedGradScaler() - elif self.fsdp is not None: - from torch.distributed.fsdp.sharded_grad_scaler import ( - ShardedGradScaler as FSDPShardedGradScaler, - ) - - self.scaler = FSDPShardedGradScaler() - elif is_torch_tpu_available(): - from torch_xla.amp import GradScaler - - self.scaler = GradScaler() - else: - self.scaler = torch.cuda.amp.GradScaler() + self.scaler = ShardedGradScaler() elif args.half_precision_backend == "cpu_amp": self.use_cpu_amp = True self.amp_dtype = torch.bfloat16 @@ -2621,7 +2608,7 @@ class Trainer: else torch.cuda.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype) ) else: - ctx_manager = contextlib.nullcontext() if sys.version_info >= (3, 7) else contextlib.suppress() + ctx_manager = contextlib.nullcontext() return ctx_manager diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 0527b40114..8097237f58 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -1189,14 +1189,6 @@ class TrainingArguments: }, ) - xpu_backend: Optional[str] = field( - default=None, - metadata={ - "help": "The backend to be used for distributed training on Intel XPU.", - "choices": ["mpi", "ccl", "gloo"], - }, - ) - def __post_init__(self): # expand paths, if not os.makedirs("~/bar") will make directory # in the current directory instead of the actual home @@ -1220,14 +1212,6 @@ class TrainingArguments: # Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it. self.evaluation_strategy = self.evaluation_strategy.value - if self.xpu_backend is not None: - warnings.warn( - "using `xpu_backend` is deprecated and will be removed in version 4.31" - " of 🤗 Transformers. Use `ddp_backend` instead", - FutureWarning, - ) - self.ddp_backend = self.xpu_backend - self.evaluation_strategy = IntervalStrategy(self.evaluation_strategy) self.logging_strategy = IntervalStrategy(self.logging_strategy) self.save_strategy = IntervalStrategy(self.save_strategy)