@@ -898,7 +898,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
self.check_trainer_state_are_the_same(state, state1)
|
||||
|
||||
# Finally, should be able to resume with the same trainer/same deepspeed engine instance
|
||||
# XXX: but currently this not possible due DS bug: https://github.com/microsoft/DeepSpeed/issues/1612
|
||||
# XXX: but currently this not possible due DS bug: https://github.com/deepspeedai/DeepSpeed/issues/1612
|
||||
# trainer.train(resume_from_checkpoint=checkpoint)
|
||||
# a workaround needs to be used that re-creates the deepspeed engine
|
||||
|
||||
@@ -975,7 +975,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
def test_load_best_model(self, stage, dtype):
|
||||
# Test that forced deepspeed reinit doesn't break the model. the forced re-init after
|
||||
# loading the best model in Trainer is there to workaround this bug in Deepspeed
|
||||
# https://github.com/microsoft/DeepSpeed/issues/1612
|
||||
# https://github.com/deepspeedai/DeepSpeed/issues/1612
|
||||
#
|
||||
# The test is derived from a repro script submitted in this Issue:
|
||||
# https://github.com/huggingface/transformers/issues/17114
|
||||
|
||||
Reference in New Issue
Block a user