chore: correct update_step and correct gradient_accumulation_steps (#26068)
This commit is contained in:
@@ -477,8 +477,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_step
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -701,8 +701,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
|
||||||
completed_steps = resume_step // args.gradient_accumulation_steps
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -636,8 +636,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
|
||||||
completed_steps = resume_step // args.gradient_accumulation_steps
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -583,8 +583,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_stepp
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -820,8 +820,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_stepp
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -848,10 +848,11 @@ def main():
|
|||||||
resume_step = None
|
resume_step = None
|
||||||
completed_steps = starting_epoch * num_update_steps_per_epoch
|
completed_steps = starting_epoch * num_update_steps_per_epoch
|
||||||
else:
|
else:
|
||||||
resume_step = int(training_difference.replace("step_", ""))
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_stepp
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -581,8 +581,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_stepp
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -652,8 +652,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_stepp
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -530,8 +530,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_step
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -690,8 +690,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_stepp
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
@@ -633,8 +633,8 @@ def main():
|
|||||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||||
starting_epoch = resume_step // len(train_dataloader)
|
starting_epoch = resume_step // len(train_dataloader)
|
||||||
|
completed_steps = resume_step // args.gradient_accumulation_steps
|
||||||
resume_step -= starting_epoch * len(train_dataloader)
|
resume_step -= starting_epoch * len(train_dataloader)
|
||||||
completed_steps = resume_step // args.gradient_accumulation_stepp
|
|
||||||
|
|
||||||
# update the progress_bar if load from checkpoint
|
# update the progress_bar if load from checkpoint
|
||||||
progress_bar.update(completed_steps)
|
progress_bar.update(completed_steps)
|
||||||
|
|||||||
Reference in New Issue
Block a user