fix layer norm epsilon in OpenAI GPT
This commit is contained in:
@@ -141,6 +141,7 @@ class OpenAIGPTConfig(object):
|
|||||||
resid_pdrop=0.1,
|
resid_pdrop=0.1,
|
||||||
embd_pdrop=0.1,
|
embd_pdrop=0.1,
|
||||||
attn_pdrop=0.1,
|
attn_pdrop=0.1,
|
||||||
|
layer_norm_epsilon=1e-5,
|
||||||
initializer_range=0.02,
|
initializer_range=0.02,
|
||||||
):
|
):
|
||||||
"""Constructs OpenAIGPTConfig.
|
"""Constructs OpenAIGPTConfig.
|
||||||
@@ -161,6 +162,7 @@ class OpenAIGPTConfig(object):
|
|||||||
attn_pdrop: The dropout ratio for the attention
|
attn_pdrop: The dropout ratio for the attention
|
||||||
probabilities.
|
probabilities.
|
||||||
embd_pdrop: The dropout ratio for the embeddings.
|
embd_pdrop: The dropout ratio for the embeddings.
|
||||||
|
layer_norm_epsilon: epsilon to use in the layer norm layers
|
||||||
initializer_range: The sttdev of the truncated_normal_initializer for
|
initializer_range: The sttdev of the truncated_normal_initializer for
|
||||||
initializing all weight matrices.
|
initializing all weight matrices.
|
||||||
"""
|
"""
|
||||||
@@ -182,6 +184,7 @@ class OpenAIGPTConfig(object):
|
|||||||
self.resid_pdrop = resid_pdrop
|
self.resid_pdrop = resid_pdrop
|
||||||
self.embd_pdrop = embd_pdrop
|
self.embd_pdrop = embd_pdrop
|
||||||
self.attn_pdrop = attn_pdrop
|
self.attn_pdrop = attn_pdrop
|
||||||
|
self.layer_norm_epsilon = layer_norm_epsilon
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@@ -318,9 +321,9 @@ class Block(nn.Module):
|
|||||||
super(Block, self).__init__()
|
super(Block, self).__init__()
|
||||||
nx = config.n_embd
|
nx = config.n_embd
|
||||||
self.attn = Attention(nx, n_ctx, config, scale)
|
self.attn = Attention(nx, n_ctx, config, scale)
|
||||||
self.ln_1 = LayerNorm(nx)
|
self.ln_1 = LayerNorm(nx, eps=config.layer_norm_epsilon)
|
||||||
self.mlp = MLP(4 * nx, config)
|
self.mlp = MLP(4 * nx, config)
|
||||||
self.ln_2 = LayerNorm(nx)
|
self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
a = self.attn(x)
|
a = self.attn(x)
|
||||||
|
|||||||
Reference in New Issue
Block a user