From 38580455dea435acd4a261e788d237d3421d65b2 Mon Sep 17 00:00:00 2001 From: Gunjan Chhablani Date: Fri, 24 Sep 2021 19:21:46 +0530 Subject: [PATCH] Add model card creation snippet to example scripts (#13730) * Update run_glue.py * Update run_glue.py * Add model creation snippet to other scripts * Fix style --- examples/pytorch/language-modeling/run_clm.py | 20 +++++++------ examples/pytorch/language-modeling/run_mlm.py | 20 +++++++------ examples/pytorch/language-modeling/run_plm.py | 20 +++++++------ examples/pytorch/multiple-choice/run_swag.py | 20 +++++++------ examples/pytorch/question-answering/run_qa.py | 20 +++++++------ .../question-answering/run_qa_beam_search.py | 20 +++++++------ .../summarization/run_summarization.py | 20 +++++++------ .../pytorch/text-classification/run_glue.py | 16 ++++++----- .../pytorch/token-classification/run_ner.py | 20 +++++++------ .../pytorch/translation/run_translation.py | 28 ++++++++++--------- 10 files changed, 113 insertions(+), 91 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index de40d4680b..4a9a52c66b 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -500,17 +500,19 @@ def main(): trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + if training_args.push_to_hub: trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) def _mp_fn(index): diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index 3eea2a384f..60e21ffe11 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -528,17 +528,19 @@ def main(): trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "fill-mask"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "fill-mask"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + if training_args.push_to_hub: trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) def _mp_fn(index): diff --git a/examples/pytorch/language-modeling/run_plm.py b/examples/pytorch/language-modeling/run_plm.py index bfda65d47d..6314bab240 100755 --- a/examples/pytorch/language-modeling/run_plm.py +++ b/examples/pytorch/language-modeling/run_plm.py @@ -499,17 +499,19 @@ def main(): trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "language-modeling"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "language-modeling"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + if training_args.push_to_hub: trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) def _mp_fn(index): diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index 0d958008e1..7d5494cacc 100755 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -430,15 +430,19 @@ def main(): trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) + kwargs = dict( + finetuned_from=model_args.model_name_or_path, + tasks="multiple-choice", + dataset_tags="swag", + dataset_args="regular", + dataset="SWAG", + language="en", + ) + if training_args.push_to_hub: - trainer.push_to_hub( - finetuned_from=model_args.model_name_or_path, - tasks="multiple-choice", - dataset_tags="swag", - dataset_args="regular", - dataset="SWAG", - language="en", - ) + trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) def _mp_fn(index): diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py index 4eefda6618..8bb913aad9 100755 --- a/examples/pytorch/question-answering/run_qa.py +++ b/examples/pytorch/question-answering/run_qa.py @@ -623,17 +623,19 @@ def main(): trainer.log_metrics("predict", metrics) trainer.save_metrics("predict", metrics) - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "question-answering"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "question-answering"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + if training_args.push_to_hub: trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) def _mp_fn(index): diff --git a/examples/pytorch/question-answering/run_qa_beam_search.py b/examples/pytorch/question-answering/run_qa_beam_search.py index fc596848cd..c5b07858a5 100755 --- a/examples/pytorch/question-answering/run_qa_beam_search.py +++ b/examples/pytorch/question-answering/run_qa_beam_search.py @@ -656,17 +656,19 @@ def main(): trainer.log_metrics("predict", metrics) trainer.save_metrics("predict", metrics) - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "question-answering"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "question-answering"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + if training_args.push_to_hub: trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) def _mp_fn(index): diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index 77cde66f2b..7eedd57c24 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -622,17 +622,19 @@ def main(): with open(output_prediction_file, "w") as writer: writer.write("\n".join(predictions)) - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "summarization"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "summarization"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + if training_args.push_to_hub: trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) return results diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 526a005d50..183a5a61f8 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -546,15 +546,17 @@ def main(): item = label_list[item] writer.write(f"{index}\t{item}\n") - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"} - if data_args.task_name is not None: - kwargs["language"] = "en" - kwargs["dataset_tags"] = "glue" - kwargs["dataset_args"] = data_args.task_name - kwargs["dataset"] = f"GLUE {data_args.task_name.upper()}" + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"} + if data_args.task_name is not None: + kwargs["language"] = "en" + kwargs["dataset_tags"] = "glue" + kwargs["dataset_args"] = data_args.task_name + kwargs["dataset"] = f"GLUE {data_args.task_name.upper()}" + if training_args.push_to_hub: trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) def _mp_fn(index): diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index 7bf642d95b..7aa506d03b 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -542,17 +542,19 @@ def main(): for prediction in true_predictions: writer.write(" ".join(prediction) + "\n") - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "token-classification"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "token-classification"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + if training_args.push_to_hub: trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) def _mp_fn(index): diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 39c8ac6e94..0742a50f51 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -590,21 +590,23 @@ def main(): with open(output_prediction_file, "w", encoding="utf-8") as writer: writer.write("\n".join(predictions)) + kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "translation"} + if data_args.dataset_name is not None: + kwargs["dataset_tags"] = data_args.dataset_name + if data_args.dataset_config_name is not None: + kwargs["dataset_args"] = data_args.dataset_config_name + kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" + else: + kwargs["dataset"] = data_args.dataset_name + + languages = [l for l in [data_args.source_lang, data_args.target_lang] if l is not None] + if len(languages) > 0: + kwargs["language"] = languages + if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "translation"} - if data_args.dataset_name is not None: - kwargs["dataset_tags"] = data_args.dataset_name - if data_args.dataset_config_name is not None: - kwargs["dataset_args"] = data_args.dataset_config_name - kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" - else: - kwargs["dataset"] = data_args.dataset_name - - languages = [l for l in [data_args.source_lang, data_args.target_lang] if l is not None] - if len(languages) > 0: - kwargs["language"] = languages - trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) return results