Add tests for no_trainer and fix existing examples (#16656)
* Fixed some bugs involving saving during epochs * Added tests mimicking the existing examples tests * Added in json exporting to all `no_trainer` examples for consistency
This commit is contained in:
@@ -23,6 +23,7 @@ https://huggingface.co/models?filter=text-generation
|
||||
# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
@@ -537,7 +538,10 @@ def main():
|
||||
|
||||
if isinstance(checkpointing_steps, int):
|
||||
if completed_steps % checkpointing_steps == 0:
|
||||
accelerator.save_state(f"step_{completed_steps}")
|
||||
output_dir = f"step_{completed_steps}"
|
||||
if args.output_dir is not None:
|
||||
output_dir = os.path.join(args.output_dir, output_dir)
|
||||
accelerator.save_state(output_dir)
|
||||
|
||||
if completed_steps >= args.max_train_steps:
|
||||
break
|
||||
@@ -581,7 +585,10 @@ def main():
|
||||
)
|
||||
|
||||
if args.checkpointing_steps == "epoch":
|
||||
accelerator.save_state(f"epoch_{epoch}")
|
||||
output_dir = f"epoch_{epoch}"
|
||||
if args.output_dir is not None:
|
||||
output_dir = os.path.join(args.output_dir, output_dir)
|
||||
accelerator.save_state(output_dir)
|
||||
|
||||
if args.output_dir is not None:
|
||||
accelerator.wait_for_everyone()
|
||||
@@ -592,6 +599,9 @@ def main():
|
||||
if args.push_to_hub:
|
||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||
|
||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||
json.dump({"perplexity": perplexity}, f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -23,6 +23,7 @@ https://huggingface.co/models?filter=fill-mask
|
||||
# You can also adapt this script on your own mlm task. Pointers for this are left as comments.
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
@@ -457,9 +458,11 @@ def main():
|
||||
train_dataset = tokenized_datasets["train"]
|
||||
eval_dataset = tokenized_datasets["validation"]
|
||||
|
||||
# Log a few random samples from the training set:
|
||||
for index in random.sample(range(len(train_dataset)), 3):
|
||||
logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
|
||||
# Conditional for small test subsets
|
||||
if len(train_dataset) > 3:
|
||||
# Log a few random samples from the training set:
|
||||
for index in random.sample(range(len(train_dataset)), 3):
|
||||
logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
|
||||
|
||||
# Data collator
|
||||
# This one will take care of randomly masking the tokens.
|
||||
@@ -581,7 +584,10 @@ def main():
|
||||
|
||||
if isinstance(checkpointing_steps, int):
|
||||
if completed_steps % checkpointing_steps == 0:
|
||||
accelerator.save_state(f"step_{completed_steps}")
|
||||
output_dir = f"step_{completed_steps}"
|
||||
if args.output_dir is not None:
|
||||
output_dir = os.path.join(args.output_dir, output_dir)
|
||||
accelerator.save_state(output_dir)
|
||||
|
||||
if completed_steps >= args.max_train_steps:
|
||||
break
|
||||
@@ -625,7 +631,10 @@ def main():
|
||||
)
|
||||
|
||||
if args.checkpointing_steps == "epoch":
|
||||
accelerator.save_state(f"epoch_{epoch}")
|
||||
output_dir = f"epoch_{epoch}"
|
||||
if args.output_dir is not None:
|
||||
output_dir = os.path.join(args.output_dir, output_dir)
|
||||
accelerator.save_state(output_dir)
|
||||
|
||||
if args.output_dir is not None:
|
||||
accelerator.wait_for_everyone()
|
||||
@@ -636,6 +645,9 @@ def main():
|
||||
if args.push_to_hub:
|
||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
||||
|
||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||
json.dump({"perplexity": perplexity}, f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user