From eacea530c1ed63b182433a1b462d33881bd20409 Mon Sep 17 00:00:00 2001 From: Cola <43774355+Colanim@users.noreply.github.com> Date: Thu, 21 May 2020 05:48:29 +0900 Subject: [PATCH] :rotating_light: Remove warning of deprecation (#4477) Remove warning of deprecated overload of addcdiv_ Fix #4451 --- src/transformers/optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index 5af1120c81..aaabd1c470 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -162,7 +162,7 @@ class AdamW(Optimizer): bias_correction2 = 1.0 - beta2 ** state["step"] step_size = step_size * math.sqrt(bias_correction2) / bias_correction1 - p.data.addcdiv_(-step_size, exp_avg, denom) + p.data.addcdiv_(exp_avg, denom, value=-step_size) # Just adding the square of the weights to the loss function is *not* # the correct way of using L2 regularization/weight decay with Adam,