From cad9f5c6cc1198d3fa58fd16f1080f2c07f0c4df Mon Sep 17 00:00:00 2001 From: Dean Wyatte <2512762+dwyatte@users.noreply.github.com> Date: Tue, 2 Jan 2024 04:48:17 -0700 Subject: [PATCH] Update docs around mixing hf scheduler with deepspeed optimizer (#28223) update docs around mixing hf scheduler with deepspeed optimizer --- docs/source/en/main_classes/deepspeed.md | 7 +------ src/transformers/integrations/deepspeed.py | 9 +-------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/docs/source/en/main_classes/deepspeed.md b/docs/source/en/main_classes/deepspeed.md index 8133f6c097..29ca9ebea2 100644 --- a/docs/source/en/main_classes/deepspeed.md +++ b/docs/source/en/main_classes/deepspeed.md @@ -1221,12 +1221,7 @@ Therefore you have two ways to take advantage of this very beneficial feature: ### Optimizer and Scheduler As long as you don't enable `offload_optimizer` you can mix and match DeepSpeed and HuggingFace schedulers and -optimizers, with the exception of using the combination of HuggingFace scheduler and DeepSpeed optimizer: - -| Combos | HF Scheduler | DS Scheduler | -|:-------------|:-------------|:-------------| -| HF Optimizer | Yes | Yes | -| DS Optimizer | No | Yes | +optimizers. It is possible to use a non-DeepSpeed optimizer when `offload_optimizer` is enabled, as long as it has both CPU and GPU implementation (except LAMB). diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index fb9c022b0f..101610af55 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -275,14 +275,7 @@ def deepspeed_optim_sched(trainer, hf_deepspeed_config, args, num_training_steps config = hf_deepspeed_config.config - # Optimizer + Scheduler - # Currently supported combos: - # 1. DS scheduler + DS optimizer: Yes - # 2. HF scheduler + HF optimizer: Yes - # 3. DS scheduler + HF optimizer: Yes - # 4. HF scheduler + DS optimizer: No - # - # Unless Offload is enabled in which case it's: + # Mixing and matching DS schedulers and optimizers is supported unless Offload is enabled in which case it's: # 1. DS scheduler + DS optimizer: Yes # 2. HF scheduler + HF optimizer: Mostly* # 3. DS scheduler + HF optimizer: Mostly*