Revert PR 32299, flag users when Zero-3 was missed (#32851)
Revert PR 32299
This commit is contained in:
@@ -709,30 +709,34 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
# Relative difference. See the note above how to get identical loss on a small bs
|
||||
self.assertTrue((no_grad_accum_loss - yes_grad_accum_loss) / (no_grad_accum_loss + 1e-15) <= 1e-3)
|
||||
|
||||
def test_missed_zero3_init(self):
|
||||
from transformers import Trainer # noqa
|
||||
# NOTE: Currently a disabled test. In the future we should re-enable it.
|
||||
# Issue resolves around Zero-3 w/ DPO/TRL + DeepSpeed
|
||||
# As well as Zero-3 inference
|
||||
# Related PR: https://github.com/huggingface/transformers/pull/32299
|
||||
# def test_missed_zero3_init(self):
|
||||
# from transformers import Trainer # noqa
|
||||
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
model = AutoModel.from_pretrained(T5_TINY)
|
||||
training_args = TrainingArguments(
|
||||
output_dir="./test_missed_zero3_init",
|
||||
deepspeed=self.get_config_dict(ZERO3),
|
||||
)
|
||||
with self.assertRaises(
|
||||
ValueError, msg="Model was not initialized with `Zero-3` despite being configured."
|
||||
):
|
||||
_ = Trainer(
|
||||
model=model,
|
||||
args=training_args,
|
||||
)
|
||||
# Now do it properly, triggered from our `TrainingArguments` earlier
|
||||
model = AutoModel.from_pretrained(T5_TINY)
|
||||
trainer = Trainer(
|
||||
model=model,
|
||||
args=training_args,
|
||||
)
|
||||
assert trainer.is_deepspeed_enabled
|
||||
assert model._transformers_zero3_init_used
|
||||
# with mockenv_context(**self.dist_env_1_gpu):
|
||||
# model = AutoModel.from_pretrained(T5_TINY)
|
||||
# training_args = TrainingArguments(
|
||||
# output_dir="./test_missed_zero3_init",
|
||||
# deepspeed=self.get_config_dict(ZERO3),
|
||||
# )
|
||||
# with self.assertRaises(
|
||||
# ValueError, msg="Model was not initialized with `Zero-3` despite being configured."
|
||||
# ):
|
||||
# _ = Trainer(
|
||||
# model=model,
|
||||
# args=training_args,
|
||||
# )
|
||||
# # Now do it properly, triggered from our `TrainingArguments` earlier
|
||||
# model = AutoModel.from_pretrained(T5_TINY)
|
||||
# trainer = Trainer(
|
||||
# model=model,
|
||||
# args=training_args,
|
||||
# )
|
||||
# assert trainer.is_deepspeed_enabled
|
||||
# assert model._transformers_zero3_init_used
|
||||
|
||||
def check_saved_checkpoints_deepspeed(self, output_dir, freq, total, stage, dtype):
|
||||
# adapted from TrainerIntegrationCommon.check_saved_checkpoints
|
||||
|
||||
Reference in New Issue
Block a user