update optimizer documentation

This commit is contained in:
thomwolf
2018-11-05 16:09:21 +01:00
parent 7394eb47a5
commit bab5d13077

View File

@@ -42,17 +42,18 @@ SCHEDULES = {
class BERTAdam(Optimizer):
"""Implements Open AI version of Adam algorithm with weight decay fix.
"""Implements BERT version of Adam algorithm with weight decay fix (and no ).
Params:
lr,
warmup=-1,
t_total=-1,
schedule='warmup_linear',
b1=0.9,
b2=0.999,
e=1e-6,
weight_decay_rate=0.01,
max_grad_norm=1.0
lr: learning rate
warmup: portion of t_total for the warmup, -1 means no warmup. Default: -1
t_total: total number of training steps for the learning
rate schedule, -1 means constant learning rate. Default: -1
schedule: schedule to use for the warmup (see above). Default: 'warmup_linear'
b1: Adams b1. Default: 0.9
b2: Adams b2. Default: 0.999
e: Adams epsilon. Default: 1e-6
weight_decay_rate: Weight decay. Default: 0.01
max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
"""
def __init__(self, params, lr, warmup=-1, t_total=-1, schedule='warmup_linear',
b1=0.9, b2=0.999, e=1e-6, weight_decay_rate=0.01,