From a03fcf570de4a90218efd4b3de253d4648fe24b1 Mon Sep 17 00:00:00 2001 From: Bilal Khan Date: Wed, 27 Nov 2019 18:42:07 -0600 Subject: [PATCH] Save tokenizer after each epoch to be able to resume training from a checkpoint --- examples/run_lm_finetuning.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/run_lm_finetuning.py b/examples/run_lm_finetuning.py index 3cae206460..1d93aa4381 100644 --- a/examples/run_lm_finetuning.py +++ b/examples/run_lm_finetuning.py @@ -274,6 +274,8 @@ def train(args, train_dataset, model, tokenizer): os.makedirs(output_dir) model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training model_to_save.save_pretrained(output_dir) + tokenizer.save_pretrained(output_dir) + torch.save(args, os.path.join(output_dir, 'training_args.bin')) logger.info("Saving model checkpoint to %s", output_dir) @@ -282,6 +284,7 @@ def train(args, train_dataset, model, tokenizer): torch.save(optimizer.state_dict(), os.path.join(output_dir, 'optimizer.pt')) torch.save(scheduler.state_dict(), os.path.join(output_dir, 'scheduler.pt')) torch.save(epoch, os.path.join(output_dir, 'training_state.pt')) + logger.info("Saving training state to %s", output_dir) if args.max_steps > 0 and global_step > args.max_steps: epoch_iterator.close()