Merge branch 'master' into attention
This commit is contained in:
@@ -22,6 +22,15 @@ import requests
|
||||
from botocore.exceptions import ClientError
|
||||
from tqdm import tqdm
|
||||
|
||||
try:
|
||||
from torch.hub import _get_torch_home
|
||||
torch_cache_home = _get_torch_home()
|
||||
except ImportError:
|
||||
torch_cache_home = os.path.expanduser(
|
||||
os.getenv('TORCH_HOME', os.path.join(
|
||||
os.getenv('XDG_CACHE_HOME', '~/.cache'), 'torch')))
|
||||
default_cache_path = os.path.join(torch_cache_home, 'pytorch_pretrained_bert')
|
||||
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
except ImportError:
|
||||
@@ -29,11 +38,11 @@ except ImportError:
|
||||
|
||||
try:
|
||||
from pathlib import Path
|
||||
PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
|
||||
Path.home() / '.pytorch_pretrained_bert'))
|
||||
PYTORCH_PRETRAINED_BERT_CACHE = Path(
|
||||
os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', default_cache_path))
|
||||
except (AttributeError, ImportError):
|
||||
PYTORCH_PRETRAINED_BERT_CACHE = os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
|
||||
os.path.join(os.path.expanduser("~"), '.pytorch_pretrained_bert'))
|
||||
default_cache_path)
|
||||
|
||||
CONFIG_NAME = "config.json"
|
||||
WEIGHTS_NAME = "pytorch_model.bin"
|
||||
|
||||
@@ -145,7 +145,8 @@ class BertConfig(object):
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=2,
|
||||
initializer_range=0.02):
|
||||
initializer_range=0.02,
|
||||
layer_norm_eps=1e-12):
|
||||
"""Constructs BertConfig.
|
||||
|
||||
Args:
|
||||
@@ -169,6 +170,7 @@ class BertConfig(object):
|
||||
`BertModel`.
|
||||
initializer_range: The sttdev of the truncated_normal_initializer for
|
||||
initializing all weight matrices.
|
||||
layer_norm_eps: The epsilon used by LayerNorm.
|
||||
"""
|
||||
if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2
|
||||
and isinstance(vocab_size_or_config_json_file, unicode)):
|
||||
@@ -188,6 +190,7 @@ class BertConfig(object):
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.initializer_range = initializer_range
|
||||
self.layer_norm_eps = layer_norm_eps
|
||||
else:
|
||||
raise ValueError("First argument must be either a vocabulary size (int)"
|
||||
"or the path to a pretrained model config file (str)")
|
||||
@@ -254,7 +257,7 @@ class BertEmbeddings(nn.Module):
|
||||
|
||||
# self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
|
||||
# any TensorFlow checkpoint file
|
||||
self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
|
||||
self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
||||
|
||||
def forward(self, input_ids, token_type_ids=None):
|
||||
@@ -332,7 +335,7 @@ class BertSelfOutput(nn.Module):
|
||||
def __init__(self, config):
|
||||
super(BertSelfOutput, self).__init__()
|
||||
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
||||
self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
|
||||
self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
||||
|
||||
def forward(self, hidden_states, input_tensor):
|
||||
@@ -378,7 +381,7 @@ class BertOutput(nn.Module):
|
||||
def __init__(self, config):
|
||||
super(BertOutput, self).__init__()
|
||||
self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
|
||||
self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
|
||||
self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
||||
|
||||
def forward(self, hidden_states, input_tensor):
|
||||
@@ -454,7 +457,7 @@ class BertPredictionHeadTransform(nn.Module):
|
||||
self.transform_act_fn = ACT2FN[config.hidden_act]
|
||||
else:
|
||||
self.transform_act_fn = config.hidden_act
|
||||
self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
|
||||
self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||
|
||||
def forward(self, hidden_states):
|
||||
hidden_states = self.dense(hidden_states)
|
||||
@@ -1020,7 +1023,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
||||
logits = model(input_ids, token_type_ids, input_mask)
|
||||
```
|
||||
"""
|
||||
def __init__(self, config, num_labels, output_attentions=False):
|
||||
def __init__(self, config, num_labels=2, output_attentions=False):
|
||||
super(BertForSequenceClassification, self).__init__(config)
|
||||
self.output_attentions = output_attentions
|
||||
self.num_labels = num_labels
|
||||
@@ -1091,7 +1094,7 @@ class BertForMultipleChoice(BertPreTrainedModel):
|
||||
logits = model(input_ids, token_type_ids, input_mask)
|
||||
```
|
||||
"""
|
||||
def __init__(self, config, num_choices, output_attentions=False):
|
||||
def __init__(self, config, num_choices=2, output_attentions=False):
|
||||
super(BertForMultipleChoice, self).__init__(config)
|
||||
self.output_attentions = output_attentions
|
||||
self.num_choices = num_choices
|
||||
@@ -1167,7 +1170,7 @@ class BertForTokenClassification(BertPreTrainedModel):
|
||||
logits = model(input_ids, token_type_ids, input_mask)
|
||||
```
|
||||
"""
|
||||
def __init__(self, config, num_labels, output_attentions=False):
|
||||
def __init__(self, config, num_labels=2, output_attentions=False):
|
||||
super(BertForTokenClassification, self).__init__(config)
|
||||
self.output_attentions = output_attentions
|
||||
self.num_labels = num_labels
|
||||
|
||||
@@ -434,9 +434,7 @@ class OpenAIGPTPreTrainedModel(nn.Module):
|
||||
module.bias.data.zero_()
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(
|
||||
cls, pretrained_model_name_or_path, num_special_tokens=None, state_dict=None, cache_dir=None, from_tf=False, *inputs, **kwargs
|
||||
):
|
||||
def from_pretrained(cls, pretrained_model_name_or_path, num_special_tokens=None, *inputs, **kwargs):
|
||||
"""
|
||||
Instantiate a OpenAIGPTPreTrainedModel from a pre-trained model file or a pytorch state dict.
|
||||
Download and cache the pre-trained model file if needed.
|
||||
@@ -449,14 +447,20 @@ class OpenAIGPTPreTrainedModel(nn.Module):
|
||||
. `openai_gpt_config.json` a configuration file for the model
|
||||
. `pytorch_model.bin` a PyTorch dump of a OpenAIGPTModel instance
|
||||
- a path or url to a pretrained model archive containing:
|
||||
. `bert_config.json` a configuration file for the model
|
||||
. `openai-gpt-config.json` a configuration file for the model
|
||||
. a series of NumPy files containing OpenAI TensorFlow trained weights
|
||||
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
|
||||
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
|
||||
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models
|
||||
*inputs, **kwargs: additional input for the specific Bert class
|
||||
(ex: num_labels for BertForSequenceClassification)
|
||||
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
|
||||
"""
|
||||
state_dict = kwargs.get('state_dict', None)
|
||||
kwargs.pop('state_dict', None)
|
||||
cache_dir = kwargs.get('cache_dir', None)
|
||||
kwargs.pop('cache_dir', None)
|
||||
from_tf = kwargs.get('from_tf', False)
|
||||
kwargs.pop('from_tf', None)
|
||||
|
||||
if pretrained_model_name_or_path in PRETRAINED_MODEL_ARCHIVE_MAP:
|
||||
archive_file = PRETRAINED_MODEL_ARCHIVE_MAP[pretrained_model_name_or_path]
|
||||
config_file = PRETRAINED_CONFIG_ARCHIVE_MAP[pretrained_model_name_or_path]
|
||||
|
||||
Reference in New Issue
Block a user