ALBERT can load pre-trained models. Doesn't inherit from BERT anymore.
This commit is contained in:
@@ -107,7 +107,7 @@ if is_torch_available():
|
|||||||
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
|
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
|
||||||
from .modeling_encoder_decoder import PreTrainedEncoderDecoder, Model2Model
|
from .modeling_encoder_decoder import PreTrainedEncoderDecoder, Model2Model
|
||||||
|
|
||||||
from .modeling_albert import (AlbertModel, AlbertForMaskedLM)
|
from .modeling_albert import (AlbertModel, AlbertForMaskedLM, ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
|
||||||
|
|
||||||
# Optimization
|
# Optimization
|
||||||
from .optimization import (AdamW, get_constant_schedule, get_constant_schedule_with_warmup, get_cosine_schedule_with_warmup,
|
from .optimization import (AdamW, get_constant_schedule, get_constant_schedule_with_warmup, get_cosine_schedule_with_warmup,
|
||||||
|
|||||||
@@ -17,12 +17,21 @@
|
|||||||
|
|
||||||
from .configuration_utils import PretrainedConfig
|
from .configuration_utils import PretrainedConfig
|
||||||
|
|
||||||
|
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||||
|
'albert-base': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-config.json",
|
||||||
|
'albert-large': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-config.json",
|
||||||
|
'albert-xlarge': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-config.json",
|
||||||
|
'albert-xxlarge': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-config.json",
|
||||||
|
}
|
||||||
|
|
||||||
class AlbertConfig(PretrainedConfig):
|
class AlbertConfig(PretrainedConfig):
|
||||||
"""Configuration for `AlbertModel`.
|
"""Configuration for `AlbertModel`.
|
||||||
|
|
||||||
The default settings match the configuration of model `albert_xxlarge`.
|
The default settings match the configuration of model `albert_xxlarge`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
pretrained_config_archive_map = ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
vocab_size_or_config_json_file=30000,
|
vocab_size_or_config_json_file=30000,
|
||||||
embedding_size=128,
|
embedding_size=128,
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import logging
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.nn import CrossEntropyLoss
|
from torch.nn import CrossEntropyLoss
|
||||||
|
from transformers.modeling_utils import PreTrainedModel
|
||||||
from transformers.configuration_albert import AlbertConfig
|
from transformers.configuration_albert import AlbertConfig
|
||||||
from transformers.modeling_bert import BertEmbeddings, BertPreTrainedModel, BertModel, BertSelfAttention, prune_linear_layer, ACT2FN
|
from transformers.modeling_bert import BertEmbeddings, BertPreTrainedModel, BertModel, BertSelfAttention, prune_linear_layer, ACT2FN
|
||||||
from .file_utils import add_start_docstrings
|
from .file_utils import add_start_docstrings
|
||||||
@@ -274,6 +275,29 @@ class AlbertTransformer(nn.Module):
|
|||||||
return outputs # last-layer hidden state, (all hidden states), (all attentions)
|
return outputs # last-layer hidden state, (all hidden states), (all attentions)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AlbertPreTrainedModel(PreTrainedModel):
|
||||||
|
""" An abstract class to handle weights initialization and
|
||||||
|
a simple interface for dowloading and loading pretrained models.
|
||||||
|
"""
|
||||||
|
config_class = AlbertConfig
|
||||||
|
pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
base_model_prefix = "albert"
|
||||||
|
|
||||||
|
def _init_weights(self, module):
|
||||||
|
""" Initialize the weights.
|
||||||
|
"""
|
||||||
|
if isinstance(module, (nn.Linear, nn.Embedding)):
|
||||||
|
# Slightly different from the TF version which uses truncated_normal for initialization
|
||||||
|
# cf https://github.com/pytorch/pytorch/pull/5617
|
||||||
|
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||||
|
if isinstance(module, (nn.Linear)) and module.bias is not None:
|
||||||
|
module.bias.data.zero_()
|
||||||
|
elif isinstance(module, nn.LayerNorm):
|
||||||
|
module.bias.data.zero_()
|
||||||
|
module.weight.data.fill_(1.0)
|
||||||
|
|
||||||
|
|
||||||
ALBERT_START_DOCSTRING = r""" The ALBERT model was proposed in
|
ALBERT_START_DOCSTRING = r""" The ALBERT model was proposed in
|
||||||
`ALBERT: A Lite BERT for Self-supervised Learning of Language Representations`_
|
`ALBERT: A Lite BERT for Self-supervised Learning of Language Representations`_
|
||||||
by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut. It presents
|
by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut. It presents
|
||||||
@@ -338,7 +362,7 @@ ALBERT_INPUTS_DOCSTRING = r"""
|
|||||||
|
|
||||||
@add_start_docstrings("The bare ALBERT Model transformer outputting raw hidden-states without any specific head on top.",
|
@add_start_docstrings("The bare ALBERT Model transformer outputting raw hidden-states without any specific head on top.",
|
||||||
ALBERT_START_DOCSTRING, ALBERT_INPUTS_DOCSTRING)
|
ALBERT_START_DOCSTRING, ALBERT_INPUTS_DOCSTRING)
|
||||||
class AlbertModel(BertModel):
|
class AlbertModel(AlbertPreTrainedModel):
|
||||||
r"""
|
r"""
|
||||||
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
|
||||||
**last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)``
|
**last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)``
|
||||||
@@ -358,6 +382,12 @@ class AlbertModel(BertModel):
|
|||||||
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
|
||||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
config_class = AlbertConfig
|
||||||
|
pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
load_tf_weights = load_tf_weights_in_albert
|
||||||
|
base_model_prefix = "albert"
|
||||||
|
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super(AlbertModel, self).__init__(config)
|
super(AlbertModel, self).__init__(config)
|
||||||
|
|
||||||
@@ -369,6 +399,11 @@ class AlbertModel(BertModel):
|
|||||||
|
|
||||||
self.init_weights()
|
self.init_weights()
|
||||||
|
|
||||||
|
def _resize_token_embeddings(self, new_num_tokens):
|
||||||
|
old_embeddings = self.embeddings.word_embeddings
|
||||||
|
new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens)
|
||||||
|
self.embeddings.word_embeddings = new_embeddings
|
||||||
|
return self.embeddings.word_embeddings
|
||||||
|
|
||||||
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
|
def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None):
|
||||||
if attention_mask is None:
|
if attention_mask is None:
|
||||||
@@ -423,7 +458,7 @@ class AlbertMLMHead(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
@add_start_docstrings("Bert Model with a `language modeling` head on top.", ALBERT_START_DOCSTRING, ALBERT_INPUTS_DOCSTRING)
|
@add_start_docstrings("Bert Model with a `language modeling` head on top.", ALBERT_START_DOCSTRING, ALBERT_INPUTS_DOCSTRING)
|
||||||
class AlbertForMaskedLM(BertPreTrainedModel):
|
class AlbertForMaskedLM(AlbertPreTrainedModel):
|
||||||
r"""
|
r"""
|
||||||
**masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
**masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
|
||||||
Labels for computing the masked language modeling loss.
|
Labels for computing the masked language modeling loss.
|
||||||
@@ -445,11 +480,6 @@ class AlbertForMaskedLM(BertPreTrainedModel):
|
|||||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
config_class = AlbertConfig
|
|
||||||
pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
|
||||||
load_tf_weights = load_tf_weights_in_albert
|
|
||||||
base_model_prefix = "albert"
|
|
||||||
|
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super(AlbertForMaskedLM, self).__init__(config)
|
super(AlbertForMaskedLM, self).__init__(config)
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
""" Tokenization classes for ALBERT model."""
|
""" Tokenization classes for ALBERT model."""
|
||||||
from __future__ import (absolute_import, division, print_function,
|
from __future__ import (absolute_import, division, print_function,
|
||||||
unicode_literals)
|
unicode_literals)
|
||||||
|
|
||||||
from .tokenization_utils import PreTrainedTokenizer
|
from .tokenization_utils import PreTrainedTokenizer
|
||||||
import logging
|
import logging
|
||||||
import unicodedata
|
import unicodedata
|
||||||
@@ -24,8 +24,25 @@ import os
|
|||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
VOCAB_FILES_NAMES = {'vocab_file': 'spiece.model'}
|
VOCAB_FILES_NAMES = {'vocab_file': 'spiece.model'}
|
||||||
|
|
||||||
|
PRETRAINED_VOCAB_FILES_MAP = {
|
||||||
|
'vocab_file':
|
||||||
|
{
|
||||||
|
'albert-base': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-spiece.model",
|
||||||
|
'albert-large': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-spiece.model",
|
||||||
|
'albert-xlarge': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-spiece.model",
|
||||||
|
'albert-xxlarge': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-spiece.model",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||||
|
'albert-base': 512,
|
||||||
|
'albert-large': 512,
|
||||||
|
'albert-xlarge': 512,
|
||||||
|
'albert-xxlarge': 512,
|
||||||
|
}
|
||||||
|
|
||||||
SPIECE_UNDERLINE = u'▁'
|
SPIECE_UNDERLINE = u'▁'
|
||||||
|
|
||||||
class AlbertTokenizer(PreTrainedTokenizer):
|
class AlbertTokenizer(PreTrainedTokenizer):
|
||||||
@@ -35,8 +52,8 @@ class AlbertTokenizer(PreTrainedTokenizer):
|
|||||||
- requires `SentencePiece <https://github.com/google/sentencepiece>`_
|
- requires `SentencePiece <https://github.com/google/sentencepiece>`_
|
||||||
"""
|
"""
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
# pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||||
# max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||||
|
|
||||||
def __init__(self, vocab_file,
|
def __init__(self, vocab_file,
|
||||||
do_lower_case=True, remove_space=True, keep_accents=False,
|
do_lower_case=True, remove_space=True, keep_accents=False,
|
||||||
|
|||||||
Reference in New Issue
Block a user