Use DS callable API to allow hf_scheduler + ds_optimizer (#13216)

* Use DS callable API to allow hf_scheduler + ds_optimizer

* Preserve backward-compatibility

* Restore backward compatibility

* Tweak arg positioning

* Tweak arg positioning

* bump the required version

* Undo indent

* Update src/transformers/trainer.py

* style

Co-authored-by: Stas Bekman <stas@stason.org>
Co-authored-by: Stas Bekman <stas00@users.noreply.github.com>
This commit is contained in:
Olatunji Ruwase
2021-08-30 10:01:06 -07:00
committed by GitHub
parent 35236b870e
commit 42f359d015
5 changed files with 24 additions and 31 deletions

View File

@@ -292,19 +292,16 @@ class TrainerIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon):
self.assertNotEqual(new_a, a)
def test_hf_scheduler_ds_optimizer(self):
# this combo is not possible at the moment
a = 0
with mockenv_context(**self.dist_env_1_gpu):
ds_config_zero2_dict = self.get_config_dict(ZERO2)
del ds_config_zero2_dict["scheduler"] # force default HF Trainer scheduler
ds_config_zero2_dict["zero_optimization"]["offload_optimizer"]["device"] = "none"
ds_config_zero2_dict["fp16"]["initial_scale_power"] = 1 # force optimizer on the first step
trainer = get_regression_trainer(local_rank=0, fp16=True, deepspeed=ds_config_zero2_dict)
with self.assertRaises(Exception) as context:
trainer.train()
self.assertTrue(
"HF scheduler + DeepSpeed optimizer combination is not possible" in str(context.exception),
f"got exception: {context.exception}",
)
trainer.train()
new_a = trainer.model.a.item()
self.assertNotEqual(new_a, a)
@require_deepspeed_aio
def test_stage3_nvme_offload(self):