fix for negative learning rate with warmup_linear in BertAdam (happens when t_total is specified incorrectly)
+ copied BERT optimization warmup functions to OpenAI optimization file + added comments
This commit is contained in:
@@ -37,7 +37,7 @@ def warmup_linear(x, warmup=0.002):
|
|||||||
After `t_total`-th training step, learning rate is zero. """
|
After `t_total`-th training step, learning rate is zero. """
|
||||||
if x < warmup:
|
if x < warmup:
|
||||||
return x/warmup
|
return x/warmup
|
||||||
return max(1.0 - x, 0)
|
return max((x-1.)/(warmup-1.), 0)
|
||||||
|
|
||||||
SCHEDULES = {
|
SCHEDULES = {
|
||||||
'warmup_cosine':warmup_cosine,
|
'warmup_cosine':warmup_cosine,
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ def warmup_linear(x, warmup=0.002):
|
|||||||
After `t_total`-th training step, learning rate is zero. """
|
After `t_total`-th training step, learning rate is zero. """
|
||||||
if x < warmup:
|
if x < warmup:
|
||||||
return x/warmup
|
return x/warmup
|
||||||
return max(1.0 - x, 0)
|
return max((x-1.)/(warmup-1.), 0)
|
||||||
|
|
||||||
SCHEDULES = {
|
SCHEDULES = {
|
||||||
'warmup_cosine':warmup_cosine,
|
'warmup_cosine':warmup_cosine,
|
||||||
|
|||||||
Reference in New Issue
Block a user