LayerNorm initialization

The LayerNorm gamma and beta should be initialized by .fill_(1.0) and .zero_().

reference links:

989e78c412/tensorflow/contrib/layers/python/layers/layers.py (L2298)

989e78c412/tensorflow/contrib/layers/python/layers/layers.py (L2308)
This commit is contained in:
Li Dong
2019-01-07 15:51:33 +08:00
committed by GitHub
parent 8da280ebbe
commit d0d9b384f2

View File

@@ -439,8 +439,8 @@ class PreTrainedBertModel(nn.Module):
# cf https://github.com/pytorch/pytorch/pull/5617 # cf https://github.com/pytorch/pytorch/pull/5617
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
elif isinstance(module, BertLayerNorm): elif isinstance(module, BertLayerNorm):
module.bias.data.normal_(mean=0.0, std=self.config.initializer_range) module.bias.data.zero_()
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) module.weight.data.fill_(1.0)
if isinstance(module, nn.Linear) and module.bias is not None: if isinstance(module, nn.Linear) and module.bias is not None:
module.bias.data.zero_() module.bias.data.zero_()