[FIX] not training when epoch is small (#3006)
* solving bug where for small epochs and large gradient_accumulation_steps we never train * black formatting * no need to change these files
This commit is contained in:
@@ -233,7 +233,11 @@ def train(args, train_dataset, model, tokenizer):
|
|||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
tr_loss += loss.item()
|
tr_loss += loss.item()
|
||||||
if (step + 1) % args.gradient_accumulation_steps == 0:
|
if (step + 1) % args.gradient_accumulation_steps == 0 or (
|
||||||
|
# last step in epoch but step is always smaller than gradient_accumulation_steps
|
||||||
|
len(epoch_iterator) <= args.gradient_accumulation_steps
|
||||||
|
and (step + 1) == len(epoch_iterator)
|
||||||
|
):
|
||||||
if args.fp16:
|
if args.fp16:
|
||||||
torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
|
torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user