initialy -> initially
This commit is contained in:
committed by
Lysandre Debut
parent
391db836ab
commit
63ed224b7c
@@ -118,7 +118,7 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
|
||||
|
||||
|
||||
def gelu(x):
|
||||
""" Original Implementation of the gelu activation function in Google Bert repo when initialy created.
|
||||
""" Original Implementation of the gelu activation function in Google Bert repo when initially created.
|
||||
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
|
||||
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
|
||||
Also see https://arxiv.org/abs/1606.08415
|
||||
|
||||
@@ -62,7 +62,7 @@ def load_bert_pt_weights_in_tf2(tf_model, pytorch_checkpoint_path):
|
||||
|
||||
def gelu(x):
|
||||
""" Gaussian Error Linear Unit.
|
||||
Original Implementation of the gelu activation function in Google Bert repo when initialy created.
|
||||
Original Implementation of the gelu activation function in Google Bert repo when initially created.
|
||||
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
|
||||
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
|
||||
Also see https://arxiv.org/abs/1606.08415
|
||||
|
||||
@@ -45,7 +45,7 @@ TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
|
||||
### UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE ###
|
||||
def gelu(x):
|
||||
""" Gaussian Error Linear Unit.
|
||||
Original Implementation of the gelu activation function in Google Bert repo when initialy created.
|
||||
Original Implementation of the gelu activation function in Google Bert repo when initially created.
|
||||
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
|
||||
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
|
||||
Also see https://arxiv.org/abs/1606.08415
|
||||
|
||||
@@ -69,7 +69,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):
|
||||
|
||||
def gelu(x):
|
||||
""" Gaussian Error Linear Unit.
|
||||
Original Implementation of the gelu activation function in Google Bert repo when initialy created.
|
||||
Original Implementation of the gelu activation function in Google Bert repo when initially created.
|
||||
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
|
||||
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
|
||||
Also see https://arxiv.org/abs/1606.08415
|
||||
|
||||
Reference in New Issue
Block a user