🚨 Fully revert atomic checkpointing 🚨 (#29370)

Fully revert atomic checkpointing
2024-03-04 06:17:42 -05:00
parent 8ef9862864
commit 1681a6d452
3 changed files with 11 additions and 71 deletions
--- a/tests/trainer/test_trainer_distributed.py
+++ b/tests/trainer/test_trainer_distributed.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from pathlib import Path
 from typing import Dict

 import numpy as np
@@ -237,20 +236,6 @@ if __name__ == "__main__":

        trainer.args.eval_accumulation_steps = None

-    # Check that saving does indeed work with temp dir rotation
-    # If this fails, will see a FileNotFoundError
-    model = RegressionModel()
-    training_args.max_steps = 1
-    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
-    sched = torch.optim.lr_scheduler.LambdaLR(opt, lambda x: 1)
-    trainer = Trainer(
-        model, training_args, optimizers=(opt, sched), data_collator=DummyDataCollator(), eval_dataset=dataset
-    )
-    trainer._save_checkpoint(model=None, trial=None)
-    # Check that the temp folder does not exist
-    assert not (Path(training_args.output_dir) / "tmp-checkpoint-0").exists()
-    assert (Path(training_args.output_dir) / "checkpoint-0").exists()
-
    # Check that `dispatch_batches=False` will work on a finite iterable dataset

    train_dataset = FiniteIterableDataset(label_names=["labels", "extra"], length=1)