Properly calculate the total train iterations and recalculate num epochs in no_trainer scripts (#17856)
This commit is contained in:
@@ -546,8 +546,6 @@ def main():
|
||||
|
||||
if args.max_train_steps is None:
|
||||
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
|
||||
else:
|
||||
args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
|
||||
|
||||
lr_scheduler = get_scheduler(
|
||||
name=args.lr_scheduler_type,
|
||||
@@ -556,6 +554,9 @@ def main():
|
||||
num_training_steps=args.max_train_steps,
|
||||
)
|
||||
|
||||
# Afterwards we recalculate our number of training epochs
|
||||
args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
|
||||
|
||||
# 5. Train
|
||||
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
||||
|
||||
|
||||
Reference in New Issue
Block a user