From ab17758874f62c03b6e5627f846a697920b16dd8 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Tue, 22 Dec 2020 14:02:26 -0500 Subject: [PATCH] Add speed metrics to all example scripts + template (#9260) --- examples/language-modeling/run_clm.py | 15 +++++++++++++-- examples/language-modeling/run_mlm.py | 15 +++++++++++++-- examples/language-modeling/run_mlm_wwm.py | 15 +++++++++++++-- examples/language-modeling/run_plm.py | 15 +++++++++++++-- examples/multiple-choice/run_swag.py | 15 +++++++++++++-- examples/question-answering/run_qa.py | 15 +++++++++++++-- .../question-answering/run_qa_beam_search.py | 15 +++++++++++++-- examples/token-classification/run_ner.py | 15 +++++++++++++-- .../run_{{cookiecutter.example_shortcut}}.py | 17 ++++++++++++++--- 9 files changed, 118 insertions(+), 19 deletions(-) diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py index 4e07441c14..1dfd835493 100644 --- a/examples/language-modeling/run_clm.py +++ b/examples/language-modeling/run_clm.py @@ -341,9 +341,20 @@ def main(): if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) else None ) - trainer.train(model_path=model_path) + train_result = trainer.train(model_path=model_path) trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -358,7 +369,7 @@ def main(): if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") - for key, value in results.items(): + for key, value in sorted(results.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py index 4ae0b62269..8e75701120 100644 --- a/examples/language-modeling/run_mlm.py +++ b/examples/language-modeling/run_mlm.py @@ -376,9 +376,20 @@ def main(): if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) else None ) - trainer.train(model_path=model_path) + train_result = trainer.train(model_path=model_path) trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -393,7 +404,7 @@ def main(): if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") - for key, value in results.items(): + for key, value in sorted(results.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") diff --git a/examples/language-modeling/run_mlm_wwm.py b/examples/language-modeling/run_mlm_wwm.py index 228205ec9a..0a54a67cb0 100644 --- a/examples/language-modeling/run_mlm_wwm.py +++ b/examples/language-modeling/run_mlm_wwm.py @@ -334,9 +334,20 @@ def main(): if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) else None ) - trainer.train(model_path=model_path) + train_result = trainer.train(model_path=model_path) trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -351,7 +362,7 @@ def main(): if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") - for key, value in results.items(): + for key, value in sorted(results.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") diff --git a/examples/language-modeling/run_plm.py b/examples/language-modeling/run_plm.py index 4b603973bd..f86bdb6ca0 100644 --- a/examples/language-modeling/run_plm.py +++ b/examples/language-modeling/run_plm.py @@ -363,9 +363,20 @@ def main(): if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) else None ) - trainer.train(model_path=model_path) + train_result = trainer.train(model_path=model_path) trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -380,7 +391,7 @@ def main(): if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") - for key, value in results.items(): + for key, value in sorted(results.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") diff --git a/examples/multiple-choice/run_swag.py b/examples/multiple-choice/run_swag.py index a8e232d90e..0cb1d72759 100644 --- a/examples/multiple-choice/run_swag.py +++ b/examples/multiple-choice/run_swag.py @@ -317,11 +317,22 @@ def main(): # Training if training_args.do_train: - trainer.train( + train_result = trainer.train( model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None ) trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -333,7 +344,7 @@ def main(): if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") - for key, value in results.items(): + for key, value in sorted(results.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") diff --git a/examples/question-answering/run_qa.py b/examples/question-answering/run_qa.py index 4e63a839b3..9d9b9aaad5 100644 --- a/examples/question-answering/run_qa.py +++ b/examples/question-answering/run_qa.py @@ -438,11 +438,22 @@ def main(): # Training if training_args.do_train: - trainer.train( + train_result = trainer.train( model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None ) trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -453,7 +464,7 @@ def main(): if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") - for key, value in results.items(): + for key, value in sorted(results.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") diff --git a/examples/question-answering/run_qa_beam_search.py b/examples/question-answering/run_qa_beam_search.py index be70dc3cd6..477dc4cd2b 100644 --- a/examples/question-answering/run_qa_beam_search.py +++ b/examples/question-answering/run_qa_beam_search.py @@ -481,11 +481,22 @@ def main(): # Training if training_args.do_train: - trainer.train( + train_result = trainer.train( model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None ) trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -496,7 +507,7 @@ def main(): if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") - for key, value in results.items(): + for key, value in sorted(results.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") diff --git a/examples/token-classification/run_ner.py b/examples/token-classification/run_ner.py index 050e9ae5da..0c72a96630 100644 --- a/examples/token-classification/run_ner.py +++ b/examples/token-classification/run_ner.py @@ -340,11 +340,22 @@ def main(): # Training if training_args.do_train: - trainer.train( + train_result = trainer.train( model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None ) trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -377,7 +388,7 @@ def main(): output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt") if trainer.is_world_process_zero(): with open(output_test_results_file, "w") as writer: - for key, value in metrics.items(): + for key, value in sorted(metrics.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") diff --git a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py index 1fed0ef2aa..aa5d3c8462 100644 --- a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py +++ b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py @@ -308,7 +308,7 @@ def main(): # Training if training_args.do_train: {%- if cookiecutter.can_train_from_scratch == "False" %} - trainer.train( + train_result = trainer.train( model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None ) {%- elif cookiecutter.can_train_from_scratch == "True" %} @@ -317,10 +317,21 @@ def main(): if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) else None ) - trainer.train(model_path=model_path) + train_result = trainer.train(model_path=model_path) {% endif %} trainer.save_model() # Saves the tokenizer too for easy upload + output_train_file = os.path.join(training_args.output_dir, "train_results.txt") + if trainer.is_world_process_zero(): + with open(output_train_file, "w") as writer: + logger.info("***** Train results *****") + for key, value in sorted(train_result.metrics.items()): + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") + + # Need to save the state, since Trainer.save_model saves only the tokenizer with the model + trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json")) + # Evaluation results = {} if training_args.do_eval: @@ -332,7 +343,7 @@ def main(): if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") - for key, value in results.items(): + for key, value in sorted(results.items()): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n")