Add new LFS prune API (#14294)
This commit is contained in:
@@ -507,7 +507,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
if args.output_dir is not None:
|
if args.output_dir is not None:
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -516,7 +518,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -548,7 +548,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
if args.output_dir is not None:
|
if args.output_dir is not None:
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -557,7 +559,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -505,7 +505,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
if args.output_dir is not None:
|
if args.output_dir is not None:
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -514,7 +516,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -731,7 +731,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
# intialize all lists to collect the batches
|
# intialize all lists to collect the batches
|
||||||
all_start_top_log_probs = []
|
all_start_top_log_probs = []
|
||||||
@@ -853,7 +855,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -737,7 +737,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
logger.info("***** Running Evaluation *****")
|
logger.info("***** Running Evaluation *****")
|
||||||
@@ -816,7 +818,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -667,7 +667,11 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
|
|
||||||
if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process:
|
if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process:
|
||||||
repo.push_to_hub(commit_message=f"Training in progress step {completed_steps}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress step {completed_steps}",
|
||||||
|
blocking=False,
|
||||||
|
auto_lfs_prune=True,
|
||||||
|
)
|
||||||
|
|
||||||
# if completed steps > `args.max_train_steps` stop
|
# if completed steps > `args.max_train_steps` stop
|
||||||
if completed_steps >= args.max_train_steps:
|
if completed_steps >= args.max_train_steps:
|
||||||
@@ -714,7 +718,7 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -601,7 +601,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
if args.output_dir is not None:
|
if args.output_dir is not None:
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -610,7 +612,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -453,7 +453,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
if args.output_dir is not None:
|
if args.output_dir is not None:
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -462,7 +464,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
if args.task_name == "mnli":
|
if args.task_name == "mnli":
|
||||||
# Final evaluation on mismatched validation set
|
# Final evaluation on mismatched validation set
|
||||||
|
|||||||
@@ -590,7 +590,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
if args.output_dir is not None:
|
if args.output_dir is not None:
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -599,7 +601,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -580,7 +580,9 @@ def main():
|
|||||||
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
|
repo.push_to_hub(
|
||||||
|
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
|
|
||||||
if args.output_dir is not None:
|
if args.output_dir is not None:
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -589,7 +591,7 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training")
|
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -2644,7 +2644,9 @@ class Trainer:
|
|||||||
commit_message = f"Training in progress, step {self.state.global_step}"
|
commit_message = f"Training in progress, step {self.state.global_step}"
|
||||||
else:
|
else:
|
||||||
commit_message = f"Training in progress, epoch {int(self.state.epoch)}"
|
commit_message = f"Training in progress, epoch {int(self.state.epoch)}"
|
||||||
_, self.push_in_progress = self.repo.push_to_hub(commit_message=commit_message, blocking=False)
|
_, self.push_in_progress = self.repo.push_to_hub(
|
||||||
|
commit_message=commit_message, blocking=False, auto_lfs_prune=True
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
if self.args.hub_strategy == HubStrategy.CHECKPOINT:
|
if self.args.hub_strategy == HubStrategy.CHECKPOINT:
|
||||||
# Move back the checkpoint to its place
|
# Move back the checkpoint to its place
|
||||||
@@ -2680,12 +2682,16 @@ class Trainer:
|
|||||||
if not self.is_world_process_zero():
|
if not self.is_world_process_zero():
|
||||||
return
|
return
|
||||||
|
|
||||||
git_head_commit_url = self.repo.push_to_hub(commit_message=commit_message, blocking=blocking)
|
git_head_commit_url = self.repo.push_to_hub(
|
||||||
|
commit_message=commit_message, blocking=blocking, auto_lfs_prune=True
|
||||||
|
)
|
||||||
# push separately the model card to be independant from the rest of the model
|
# push separately the model card to be independant from the rest of the model
|
||||||
if self.args.should_save:
|
if self.args.should_save:
|
||||||
self.create_model_card(model_name=model_name, **kwargs)
|
self.create_model_card(model_name=model_name, **kwargs)
|
||||||
try:
|
try:
|
||||||
self.repo.push_to_hub(commit_message="update model card README.md", blocking=blocking)
|
self.repo.push_to_hub(
|
||||||
|
commit_message="update model card README.md", blocking=blocking, auto_lfs_prune=True
|
||||||
|
)
|
||||||
except EnvironmentError as exc:
|
except EnvironmentError as exc:
|
||||||
logger.error(f"Error pushing update to the model card. Please read logs and retry.\n${exc}")
|
logger.error(f"Error pushing update to the model card. Please read logs and retry.\n${exc}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user