From 63ed224b7c550ead5f9599187e665ded57ce80d4 Mon Sep 17 00:00:00 2001 From: Santiago Castro Date: Wed, 2 Oct 2019 11:02:08 -0400 Subject: [PATCH] initialy -> initially --- transformers/modeling_bert.py | 2 +- transformers/modeling_tf_bert.py | 2 +- transformers/modeling_tf_distilbert.py | 2 +- transformers/modeling_tf_xlm.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/transformers/modeling_bert.py b/transformers/modeling_bert.py index 51e407d0a6..fc448fa366 100644 --- a/transformers/modeling_bert.py +++ b/transformers/modeling_bert.py @@ -118,7 +118,7 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path): def gelu(x): - """ Original Implementation of the gelu activation function in Google Bert repo when initialy created. + """ Original Implementation of the gelu activation function in Google Bert repo when initially created. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) Also see https://arxiv.org/abs/1606.08415 diff --git a/transformers/modeling_tf_bert.py b/transformers/modeling_tf_bert.py index d763ca991e..4de94751f8 100644 --- a/transformers/modeling_tf_bert.py +++ b/transformers/modeling_tf_bert.py @@ -62,7 +62,7 @@ def load_bert_pt_weights_in_tf2(tf_model, pytorch_checkpoint_path): def gelu(x): """ Gaussian Error Linear Unit. - Original Implementation of the gelu activation function in Google Bert repo when initialy created. + Original Implementation of the gelu activation function in Google Bert repo when initially created. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) Also see https://arxiv.org/abs/1606.08415 diff --git a/transformers/modeling_tf_distilbert.py b/transformers/modeling_tf_distilbert.py index 2a917a30a4..5ce1616bcc 100644 --- a/transformers/modeling_tf_distilbert.py +++ b/transformers/modeling_tf_distilbert.py @@ -45,7 +45,7 @@ TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { ### UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE ### def gelu(x): """ Gaussian Error Linear Unit. - Original Implementation of the gelu activation function in Google Bert repo when initialy created. + Original Implementation of the gelu activation function in Google Bert repo when initially created. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) Also see https://arxiv.org/abs/1606.08415 diff --git a/transformers/modeling_tf_xlm.py b/transformers/modeling_tf_xlm.py index f8f199bbe6..83cc37c6a7 100644 --- a/transformers/modeling_tf_xlm.py +++ b/transformers/modeling_tf_xlm.py @@ -69,7 +69,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out): def gelu(x): """ Gaussian Error Linear Unit. - Original Implementation of the gelu activation function in Google Bert repo when initialy created. + Original Implementation of the gelu activation function in Google Bert repo when initially created. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) Also see https://arxiv.org/abs/1606.08415