Fix lr_scheduler in no_trainer training scripts (#27872)
* Fix lr_scheduler * Fix lr scheduler
This commit is contained in:
@@ -438,8 +438,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
@@ -626,8 +626,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
@@ -526,8 +526,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
@@ -563,8 +563,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
@@ -510,8 +510,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
@@ -750,8 +750,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
@@ -780,8 +780,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
@@ -513,8 +513,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
@@ -580,8 +580,8 @@ def main():
|
|||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
name=args.lr_scheduler_type,
|
name=args.lr_scheduler_type,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
|
num_warmup_steps=args.num_warmup_steps * accelerator.num_processes,
|
||||||
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
|
num_training_steps=args.max_train_steps if overrode_max_train_steps else args.max_train_steps * accelerator.num_processes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare everything with our `accelerator`.
|
# Prepare everything with our `accelerator`.
|
||||||
|
|||||||
Reference in New Issue
Block a user