From d0d9b384f2578869066d2d028ee7339bbb661e75 Mon Sep 17 00:00:00 2001 From: Li Dong Date: Mon, 7 Jan 2019 15:51:33 +0800 Subject: [PATCH] LayerNorm initialization The LayerNorm gamma and beta should be initialized by .fill_(1.0) and .zero_(). reference links: https://github.com/tensorflow/tensorflow/blob/989e78c412a7e0f5361d4d7dfdfb230c8136e749/tensorflow/contrib/layers/python/layers/layers.py#L2298 https://github.com/tensorflow/tensorflow/blob/989e78c412a7e0f5361d4d7dfdfb230c8136e749/tensorflow/contrib/layers/python/layers/layers.py#L2308 --- pytorch_pretrained_bert/modeling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_pretrained_bert/modeling.py b/pytorch_pretrained_bert/modeling.py index acdc741f6d..83562ea573 100644 --- a/pytorch_pretrained_bert/modeling.py +++ b/pytorch_pretrained_bert/modeling.py @@ -439,8 +439,8 @@ class PreTrainedBertModel(nn.Module): # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) elif isinstance(module, BertLayerNorm): - module.bias.data.normal_(mean=0.0, std=self.config.initializer_range) - module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + module.bias.data.zero_() + module.weight.data.fill_(1.0) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_()