🚨 Fully revert atomic checkpointing 🚨 (#29370)

Fully revert atomic checkpointing
2024-03-04 06:17:42 -05:00
parent 8ef9862864
commit 1681a6d452
3 changed files with 11 additions and 71 deletions
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -84,8 +84,7 @@ from transformers.testing_utils import (
    slow,
    torch_device,
 )
-from transformers.tokenization_utils_base import PreTrainedTokenizerBase
-from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, HPSearchBackend, get_last_checkpoint
+from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, HPSearchBackend
 from transformers.training_args import OptimizerNames
 from transformers.utils import (
    SAFE_WEIGHTS_INDEX_NAME,
@@ -1406,19 +1405,6 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
            trainer.train()
            self.check_saved_checkpoints(tmpdir, 5, int(self.n_epochs * 64 / self.batch_size), False)

-    def test_save_checkpoints_is_atomic(self):
-        class UnsaveableTokenizer(PreTrainedTokenizerBase):
-            def save_pretrained(self, *args, **kwargs):
-                raise OSError("simulated file write error")
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            trainer = get_regression_trainer(output_dir=tmpdir, save_steps=5)
-            # Attach unsaveable tokenizer to partially fail checkpointing
-            trainer.tokenizer = UnsaveableTokenizer()
-            with self.assertRaises(OSError) as _context:
-                trainer.train()
-            assert get_last_checkpoint(tmpdir) is None
-
    @require_safetensors
    def test_safe_checkpoints(self):
        for save_safetensors in [True, False]: