From deb2b59073fc42a9b8dda119177d45778a5d3c32 Mon Sep 17 00:00:00 2001 From: bofeng huang Date: Mon, 22 Jan 2024 15:22:18 +0100 Subject: [PATCH] Fix lr_scheduler in no_trainer training scripts (#27872) * Fix lr_scheduler * Fix lr scheduler --- .../run_image_classification_no_trainer.py | 4 ++-- examples/pytorch/image-pretraining/run_mim_no_trainer.py | 4 ++-- examples/pytorch/language-modeling/run_clm_no_trainer.py | 4 ++-- examples/pytorch/language-modeling/run_mlm_no_trainer.py | 4 ++-- examples/pytorch/multiple-choice/run_swag_no_trainer.py | 4 ++-- .../question-answering/run_qa_beam_search_no_trainer.py | 4 ++-- examples/pytorch/question-answering/run_qa_no_trainer.py | 4 ++-- .../run_semantic_segmentation_no_trainer.py | 4 ++-- .../pytorch/summarization/run_summarization_no_trainer.py | 4 ++-- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index 9d4faa19d6..7c3aa725ea 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -438,8 +438,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`. diff --git a/examples/pytorch/image-pretraining/run_mim_no_trainer.py b/examples/pytorch/image-pretraining/run_mim_no_trainer.py index 8e3cc6cddc..6d5c1849e5 100644 --- a/examples/pytorch/image-pretraining/run_mim_no_trainer.py +++ b/examples/pytorch/image-pretraining/run_mim_no_trainer.py @@ -626,8 +626,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`. diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 7c3a3d15b6..a8e9b608e4 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -526,8 +526,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`. diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index a639944814..97860cd266 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -563,8 +563,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`. diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index 0f88c764e9..533072fc0a 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -510,8 +510,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`. diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 5e328dffc6..65c38edca2 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -750,8 +750,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`. diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index 846971317c..5cb00d5225 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -780,8 +780,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`. diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index 6d16f127d1..315c0ce461 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -513,8 +513,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`. diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index c998acbfe9..3212ef1af5 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -580,8 +580,8 @@ def main(): lr_scheduler = get_scheduler( name=args.lr_scheduler_type, optimizer=optimizer, - num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=args.num_warmup_steps * accelerator.num_processes, + num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes, ) # Prepare everything with our `accelerator`.