Division to num_train_optimizer of global_step in lr_this_step is removed.
This commit is contained in:
@@ -315,8 +315,7 @@ def main():
|
|||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
|
||||||
args.warmup_proportion)
|
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|||||||
@@ -604,8 +604,7 @@ def main():
|
|||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
|
||||||
args.warmup_proportion)
|
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|||||||
@@ -855,8 +855,7 @@ def main():
|
|||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
|
||||||
args.warmup_proportion)
|
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|||||||
@@ -1016,8 +1016,7 @@ def main():
|
|||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used and handles this automatically
|
# if args.fp16 is False, BertAdam is used and handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
|
||||||
args.warmup_proportion)
|
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|||||||
@@ -467,8 +467,7 @@ def main():
|
|||||||
if args.fp16:
|
if args.fp16:
|
||||||
# modify learning rate with special warm up BERT uses
|
# modify learning rate with special warm up BERT uses
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step,
|
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
|
||||||
args.warmup_proportion)
|
|
||||||
for param_group in optimizer.param_groups:
|
for param_group in optimizer.param_groups:
|
||||||
param_group['lr'] = lr_this_step
|
param_group['lr'] = lr_this_step
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|||||||
Reference in New Issue
Block a user