From da2d8ca2652d9256fdd3edbdfb7253a3f21fb833 Mon Sep 17 00:00:00 2001 From: lukovnikov Date: Tue, 26 Feb 2019 17:16:06 +0100 Subject: [PATCH] fix for negative learning rate with warmup_linear in BertAdam (happens when t_total is specified incorrectly) + copied BERT optimization warmup functions to OpenAI optimization file + added comments --- pytorch_pretrained_bert/optimization.py | 2 +- pytorch_pretrained_bert/optimization_openai.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_pretrained_bert/optimization.py b/pytorch_pretrained_bert/optimization.py index 2b20cd87b7..7cde422453 100644 --- a/pytorch_pretrained_bert/optimization.py +++ b/pytorch_pretrained_bert/optimization.py @@ -37,7 +37,7 @@ def warmup_linear(x, warmup=0.002): After `t_total`-th training step, learning rate is zero. """ if x < warmup: return x/warmup - return max(1.0 - x, 0) + return max((x-1.)/(warmup-1.), 0) SCHEDULES = { 'warmup_cosine':warmup_cosine, diff --git a/pytorch_pretrained_bert/optimization_openai.py b/pytorch_pretrained_bert/optimization_openai.py index 5950865a17..6cdc6c8f0c 100644 --- a/pytorch_pretrained_bert/optimization_openai.py +++ b/pytorch_pretrained_bert/optimization_openai.py @@ -37,7 +37,7 @@ def warmup_linear(x, warmup=0.002): After `t_total`-th training step, learning rate is zero. """ if x < warmup: return x/warmup - return max(1.0 - x, 0) + return max((x-1.)/(warmup-1.), 0) SCHEDULES = { 'warmup_cosine':warmup_cosine,