From 08a5f57567d8a975d900b66658bfd3c28c9dbec5 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Fri, 5 Nov 2021 18:58:51 -0400 Subject: [PATCH] Add new LFS prune API (#14294) --- .../pytorch/language-modeling/run_clm_no_trainer.py | 6 ++++-- .../pytorch/language-modeling/run_mlm_no_trainer.py | 6 ++++-- .../pytorch/multiple-choice/run_swag_no_trainer.py | 6 ++++-- .../run_qa_beam_search_no_trainer.py | 6 ++++-- .../pytorch/question-answering/run_qa_no_trainer.py | 6 ++++-- .../run_wav2vec2_pretraining_no_trainer.py | 8 ++++++-- .../summarization/run_summarization_no_trainer.py | 6 ++++-- .../text-classification/run_glue_no_trainer.py | 6 ++++-- .../token-classification/run_ner_no_trainer.py | 6 ++++-- .../translation/run_translation_no_trainer.py | 6 ++++-- src/transformers/trainer.py | 12 +++++++++--- 11 files changed, 51 insertions(+), 23 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 4ed7bd1bd5..ef9edffb34 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -507,7 +507,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) if args.output_dir is not None: accelerator.wait_for_everyone() @@ -516,7 +518,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index cf2841ab5f..e356741daf 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -548,7 +548,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) if args.output_dir is not None: accelerator.wait_for_everyone() @@ -557,7 +559,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index de8094ee35..07d212a65a 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -505,7 +505,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) if args.output_dir is not None: accelerator.wait_for_everyone() @@ -514,7 +516,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 097de91e50..5f2ac9c8c9 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -731,7 +731,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) # intialize all lists to collect the batches all_start_top_log_probs = [] @@ -853,7 +855,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index 42bbecf008..1d2b3ee08b 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -737,7 +737,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) # Evaluation logger.info("***** Running Evaluation *****") @@ -816,7 +818,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index 755581b42c..2a3883a2d3 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -667,7 +667,11 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process: - repo.push_to_hub(commit_message=f"Training in progress step {completed_steps}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress step {completed_steps}", + blocking=False, + auto_lfs_prune=True, + ) # if completed steps > `args.max_train_steps` stop if completed_steps >= args.max_train_steps: @@ -714,7 +718,7 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index 054efbfc52..7c36898fa5 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -601,7 +601,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) if args.output_dir is not None: accelerator.wait_for_everyone() @@ -610,7 +612,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index ba2482e58a..43de2c4060 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -453,7 +453,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) if args.output_dir is not None: accelerator.wait_for_everyone() @@ -462,7 +464,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if args.task_name == "mnli": # Final evaluation on mismatched validation set diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index d485c17384..50b7645182 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -590,7 +590,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) if args.output_dir is not None: accelerator.wait_for_everyone() @@ -599,7 +601,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 4b5c025131..838957a8f1 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -580,7 +580,9 @@ def main(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + ) if args.output_dir is not None: accelerator.wait_for_everyone() @@ -589,7 +591,7 @@ def main(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training") + repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) if __name__ == "__main__": diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 4cda1dcb84..a39ce6bbfd 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -2644,7 +2644,9 @@ class Trainer: commit_message = f"Training in progress, step {self.state.global_step}" else: commit_message = f"Training in progress, epoch {int(self.state.epoch)}" - _, self.push_in_progress = self.repo.push_to_hub(commit_message=commit_message, blocking=False) + _, self.push_in_progress = self.repo.push_to_hub( + commit_message=commit_message, blocking=False, auto_lfs_prune=True + ) finally: if self.args.hub_strategy == HubStrategy.CHECKPOINT: # Move back the checkpoint to its place @@ -2680,12 +2682,16 @@ class Trainer: if not self.is_world_process_zero(): return - git_head_commit_url = self.repo.push_to_hub(commit_message=commit_message, blocking=blocking) + git_head_commit_url = self.repo.push_to_hub( + commit_message=commit_message, blocking=blocking, auto_lfs_prune=True + ) # push separately the model card to be independant from the rest of the model if self.args.should_save: self.create_model_card(model_name=model_name, **kwargs) try: - self.repo.push_to_hub(commit_message="update model card README.md", blocking=blocking) + self.repo.push_to_hub( + commit_message="update model card README.md", blocking=blocking, auto_lfs_prune=True + ) except EnvironmentError as exc: logger.error(f"Error pushing update to the model card. Please read logs and retry.\n${exc}")