From 81e1e2489f71e9f0250034492b0d52616a450c77 Mon Sep 17 00:00:00 2001 From: Li Li Date: Mon, 10 Dec 2018 02:08:38 -0800 Subject: [PATCH] Fix optimizer to work with horovod --- pytorch_pretrained_bert/optimization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_pretrained_bert/optimization.py b/pytorch_pretrained_bert/optimization.py index 4266a8f83b..4314c84144 100644 --- a/pytorch_pretrained_bert/optimization.py +++ b/pytorch_pretrained_bert/optimization.py @@ -17,6 +17,7 @@ import math import torch from torch.optim import Optimizer +from torch.optim.optimizer import required from torch.nn.utils import clip_grad_norm_ def warmup_cosine(x, warmup=0.002): @@ -55,10 +56,10 @@ class BertAdam(Optimizer): weight_decay_rate: Weight decay. Default: 0.01 max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0 """ - def __init__(self, params, lr, warmup=-1, t_total=-1, schedule='warmup_linear', + def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear', b1=0.9, b2=0.999, e=1e-6, weight_decay_rate=0.01, max_grad_norm=1.0): - if not lr >= 0.0: + if lr is not required and lr < 0.0: raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr)) if schedule not in SCHEDULES: raise ValueError("Invalid schedule parameter: {}".format(schedule))