[trainer / deepspeed] fix hyperparameter_search (#16740)
* [trainer / deepspeed] fix hyperparameter_search * require optuna * style * oops * add dep in the right place * create deepspeed-testing dep group * Trigger CI
This commit is contained in:
@@ -34,6 +34,7 @@ from transformers.testing_utils import (
|
||||
get_gpu_count,
|
||||
mockenv_context,
|
||||
require_deepspeed,
|
||||
require_optuna,
|
||||
require_torch_gpu,
|
||||
require_torch_multi_gpu,
|
||||
slow,
|
||||
@@ -363,6 +364,33 @@ class TrainerIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon):
|
||||
trainer.train()
|
||||
self.assertIn("DeepSpeed info", cl.out, "expected DeepSpeed logger output but got none")
|
||||
|
||||
@require_optuna
|
||||
def test_hyperparameter_search(self):
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
|
||||
ds_config_zero3_dict = self.get_config_dict(ZERO3)
|
||||
|
||||
# hyperparameter_search requires model_init() to recreate the model for each trial
|
||||
def model_init():
|
||||
config = RegressionModelConfig(a=0, b=0, double_output=False)
|
||||
model = RegressionPreTrainedModel(config)
|
||||
return model
|
||||
|
||||
trainer = get_regression_trainer(
|
||||
local_rank=0,
|
||||
fp16=True,
|
||||
model_init=model_init,
|
||||
deepspeed=ds_config_zero3_dict,
|
||||
)
|
||||
|
||||
n_trials = 3
|
||||
with CaptureLogger(deepspeed_logger) as cl:
|
||||
with CaptureStd() as cs:
|
||||
trainer.hyperparameter_search(direction="maximize", n_trials=n_trials)
|
||||
self.assertIn("DeepSpeed info", cl.out, "expected DeepSpeed logger output but got none")
|
||||
self.assertIn(f"Trial {n_trials-1} finished with value", cs.err, "expected hyperparameter_search output")
|
||||
self.assertIn("Best is trial", cs.err, "expected hyperparameter_search output")
|
||||
|
||||
# --- These tests need to run on both zero stages --- #
|
||||
|
||||
@parameterized.expand(params, name_func=parameterized_custom_name_func)
|
||||
|
||||
Reference in New Issue
Block a user