[deepspeed] zero inference (#14253)

* [deepspeed] zero inference

* only z3 makes sense for inference

* fix and style

* docs

* rework

* fix test

* Apply suggestions from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* responding to suggestions

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Stas Bekman
2021-11-23 14:09:15 -08:00
committed by GitHub
parent 69e16abf98
commit 956a483173
6 changed files with 149 additions and 38 deletions

View File

@@ -697,11 +697,10 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
def test_basic_distributed(self, stage):
self.run_and_check(stage=stage, distributed=True)
@parameterized.expand(stages)
def test_do_eval_no_train(self, stage):
# we should not fail if train is skipped
def test_do_eval_no_train(self):
# testing only zero3 since zero2 makes no sense with inference
self.run_and_check(
stage=stage,
stage=ZERO3,
eval_steps=1,
distributed=False,
do_train=False,
@@ -755,6 +754,22 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
self.do_checks(output_dir, do_train=do_train, do_eval=do_eval)
@require_torch_multi_gpu
@parameterized.expand(["fp16", "fp32"])
def test_inference(self, dtype):
# this is just inference, so no optimizer should be loaded
# it only works for z3 (makes no sense with z1-z2)
fp16 = True if dtype == "fp16" else False
self.run_and_check(
stage=ZERO3,
model_name=T5_TINY,
distributed=True,
do_train=False,
do_eval=True,
quality_checks=False,
fp16=fp16,
)
def do_checks(self, output_dir, do_train=True, do_eval=True, quality_checks=True):
if do_train: