Fix steps bugs in no trainer examples (#24197)
Fix step bugs in no trainer + load checkpoint + grad acc
This commit is contained in:
@@ -559,10 +559,11 @@ def main():
|
||||
resume_step = None
|
||||
completed_steps = starting_epoch * num_update_steps_per_epoch
|
||||
else:
|
||||
resume_step = int(training_difference.replace("step_", ""))
|
||||
# need to multiply `gradient_accumulation_steps` to reflect real steps
|
||||
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
|
||||
starting_epoch = resume_step // len(train_dataloader)
|
||||
resume_step -= starting_epoch * len(train_dataloader)
|
||||
completed_steps = resume_step
|
||||
completed_steps = resume_step // args.gradient_accumulation_stepp
|
||||
|
||||
# update the progress_bar if load from checkpoint
|
||||
progress_bar.update(completed_steps)
|
||||
|
||||
Reference in New Issue
Block a user