Adding the LXMERT pretraining model (MultiModal languageXvision) to HuggingFace's suite of models (#5793)
* added template files for LXMERT and competed the configuration_lxmert.py * added modeling, tokization, testing, and finishing touched for lxmert [yet to be tested] * added model card for lxmert * cleaning up lxmert code * Update src/transformers/modeling_lxmert.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Update src/transformers/modeling_tf_lxmert.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Update src/transformers/modeling_tf_lxmert.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Update src/transformers/modeling_lxmert.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * tested torch lxmert, changed documtention, updated outputs, and other small fixes * Update src/transformers/convert_pytorch_checkpoint_to_tf2.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Update src/transformers/convert_pytorch_checkpoint_to_tf2.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Update src/transformers/convert_pytorch_checkpoint_to_tf2.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * renaming, other small issues, did not change TF code in this commit * added lxmert question answering model in pytorch * added capability to edit number of qa labels for lxmert * made answer optional for lxmert question answering * add option to return hidden_states for lxmert * changed default qa labels for lxmert * changed config archive path * squshing 3 commits: merged UI + testing improvments + more UI and testing * changed some variable names for lxmert * TF LXMERT * Various fixes to LXMERT * Final touches to LXMERT * AutoTokenizer order * Add LXMERT to index.rst and README.md * Merge commit test fixes + Style update * TensorFlow 2.3.0 sequential model changes variable names Remove inherited test * Update src/transformers/modeling_tf_pytorch_utils.py * Update docs/source/model_doc/lxmert.rst Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update docs/source/model_doc/lxmert.rst Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_lxmert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * added suggestions * Fixes * Final fixes for TF model * Fix docs Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
4ebb52afdb
commit
ea2c6f1afc
@@ -31,6 +31,7 @@ from .configuration_encoder_decoder import EncoderDecoderConfig
|
||||
from .configuration_flaubert import FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FlaubertConfig
|
||||
from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
|
||||
from .configuration_longformer import LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, LongformerConfig
|
||||
from .configuration_lxmert import LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP, LxmertConfig
|
||||
from .configuration_marian import MarianConfig
|
||||
from .configuration_mbart import MBartConfig
|
||||
from .configuration_mmbt import MMBTConfig
|
||||
@@ -156,6 +157,7 @@ from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
|
||||
from .tokenization_flaubert import FlaubertTokenizer
|
||||
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
|
||||
from .tokenization_longformer import LongformerTokenizer, LongformerTokenizerFast
|
||||
from .tokenization_lxmert import LxmertTokenizer, LxmertTokenizerFast
|
||||
from .tokenization_mbart import MBartTokenizer
|
||||
from .tokenization_mobilebert import MobileBertTokenizer, MobileBertTokenizerFast
|
||||
from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
|
||||
@@ -343,6 +345,15 @@ if is_torch_available():
|
||||
LongformerModel,
|
||||
LongformerSelfAttention,
|
||||
)
|
||||
from .modeling_lxmert import (
|
||||
LxmertEncoder,
|
||||
LxmertForPreTraining,
|
||||
LxmertForQuestionAnswering,
|
||||
LxmertModel,
|
||||
LxmertPreTrainedModel,
|
||||
LxmertVisualFeatureEncoder,
|
||||
LxmertXLayer,
|
||||
)
|
||||
from .modeling_marian import MarianMTModel
|
||||
from .modeling_mbart import MBartForConditionalGeneration
|
||||
from .modeling_mmbt import MMBTForClassification, MMBTModel, ModalEmbeddings
|
||||
@@ -573,6 +584,14 @@ if is_tf_available():
|
||||
TFLongformerModel,
|
||||
TFLongformerSelfAttention,
|
||||
)
|
||||
from .modeling_tf_lxmert import (
|
||||
TF_LXMERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
TFLxmertForPreTraining,
|
||||
TFLxmertMainLayer,
|
||||
TFLxmertModel,
|
||||
TFLxmertPreTrainedModel,
|
||||
TFLxmertVisualFeatureEncoder,
|
||||
)
|
||||
from .modeling_tf_mobilebert import (
|
||||
TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
TFMobileBertForMaskedLM,
|
||||
|
||||
@@ -155,5 +155,13 @@ class ConvertCommand(BaseTransformersCLICommand):
|
||||
)
|
||||
|
||||
convert_xlm_checkpoint_to_pytorch(self._tf_checkpoint, self._pytorch_dump_output)
|
||||
elif self._model_type == "lxmert":
|
||||
from transformers.convert_lxmert_original_pytorch_checkpoint_to_pytorch import (
|
||||
convert_lxmert_checkpoint_to_pytorch,
|
||||
)
|
||||
|
||||
convert_lxmert_checkpoint_to_pytorch(self._tf_checkpoint, self._pytorch_dump_output)
|
||||
else:
|
||||
raise ValueError("--model_type should be selected in the list [bert, gpt, gpt2, transfo_xl, xlnet, xlm]")
|
||||
raise ValueError(
|
||||
"--model_type should be selected in the list [bert, gpt, gpt2, transfo_xl, xlnet, xlm, lxmert]"
|
||||
)
|
||||
|
||||
@@ -28,6 +28,7 @@ from .configuration_encoder_decoder import EncoderDecoderConfig
|
||||
from .configuration_flaubert import FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FlaubertConfig
|
||||
from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
|
||||
from .configuration_longformer import LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, LongformerConfig
|
||||
from .configuration_lxmert import LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP, LxmertConfig
|
||||
from .configuration_marian import MarianConfig
|
||||
from .configuration_mbart import MBART_PRETRAINED_CONFIG_ARCHIVE_MAP, MBartConfig
|
||||
from .configuration_mobilebert import MobileBertConfig
|
||||
@@ -66,6 +67,7 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
|
||||
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
]
|
||||
for key, value, in pretrained_map.items()
|
||||
)
|
||||
@@ -166,6 +168,10 @@ CONFIG_MAPPING = OrderedDict(
|
||||
"encoder-decoder",
|
||||
EncoderDecoderConfig,
|
||||
),
|
||||
(
|
||||
"lxmert",
|
||||
LxmertConfig,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
179
src/transformers/configuration_lxmert.py
Normal file
179
src/transformers/configuration_lxmert.py
Normal file
@@ -0,0 +1,179 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018, Hao Tan, Mohit Bansal
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" LXMERT model configuration """
|
||||
|
||||
|
||||
import logging
|
||||
|
||||
from .configuration_utils import PretrainedConfig
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||
"unc-nlp/lxmert-base-uncased": "",
|
||||
}
|
||||
|
||||
|
||||
class LxmertConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a :class:`~transformers.BertModel`.
|
||||
It is used to instantiate an Lxmert model according to the specified arguments, defining the model
|
||||
architecture.
|
||||
|
||||
|
||||
Args:
|
||||
vocab_size (:obj:`int`, optional, defaults to 30522):
|
||||
Vocabulary size of the BERT model. Defines the different tokens that
|
||||
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.BertModel`.
|
||||
hidden_size (:obj:`int`, optional, defaults to 768):
|
||||
Dimensionality of the encoder layers and the pooler layer.
|
||||
r_layers (:obj:`int`, optional, defaults to 5):
|
||||
Number of hidden layers in the Transformer visual encoder.
|
||||
l_layers (:obj:`int`, optional, defaults to 9):
|
||||
Number of hidden layers in the Transformer language encoder.
|
||||
x_layers (:obj:`int`, optional, defaults to 5):
|
||||
Number of hidden layers in the Transformer cross modality encoder.
|
||||
num_attention_heads (:obj:`int`, optional, defaults to 5):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
intermediate_size (:obj:`int`, optional, defaults to 3072):
|
||||
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
||||
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to "gelu"):
|
||||
The non-linear activation function (function or string) in the encoder and pooler.
|
||||
If string, "gelu", "relu", "swish" and "gelu_new" are supported.
|
||||
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
|
||||
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
max_position_embeddings (:obj:`int`, optional, defaults to 512):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
||||
type_vocab_size (:obj:`int`, optional, defaults to 2):
|
||||
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
|
||||
initializer_range (:obj:`float`, optional, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
layer_norm_eps (:obj:`float`, optional, defaults to 1e-12):
|
||||
The epsilon used by the layer normalization layers.
|
||||
visual_feat_dim (:obj:`int`, optional, defaults to 2048):
|
||||
This represents the last dimension of the pooled-object features used as input for the model,
|
||||
representing the size of each object feature itself.
|
||||
visual_pos_dim (:obj:`int`, optional, defaults to 4):
|
||||
This represents the number of spacial features that are mixed into the visual features.
|
||||
The default is set to 4 because most commonly this will represent the location of a bounding box.
|
||||
i.e. (x, y, width, height)
|
||||
visual_loss_normalizer (:obj:`float`, optional, defaults to 1/15):
|
||||
This represents the scaling factor in which each visual loss is multiplied by if during pretraining,
|
||||
one decided to train with multiple vision-based loss objectives.
|
||||
num_qa_labels (:obj:`int`, optional, defaults to 9500):
|
||||
This represents the total number of different question answering (QA) labels there are. If using more than one dataset with QA,
|
||||
the user will need to account for the total number of labels that all of the datasets have in total.
|
||||
num_object_labels (:obj:`int`, optional, defaults to 1600):
|
||||
This represents the total number of semantically unique objects that lxmert will be able to classify a pooled-object feature
|
||||
as belonging too.
|
||||
num_attr_labels (:obj:`int`, optional, defaults to 400):
|
||||
This represents the total number of semantically unique attributes that lxmert will be able to classify a pooled-object feature
|
||||
as possessing.
|
||||
task_matched (:obj:`bool`, optional, defaults to True):
|
||||
This task is used for sentence-image matching. If the sentence correctly describes the image the label will be 1.
|
||||
If the sentence does not correctly describe the image, the label will be 0.
|
||||
task_mask_lm (:obj:`bool`, optional, defaults to True):
|
||||
This task is the defacto masked langauge modeling used in pretraining models such as BERT.
|
||||
task_obj_predict (:obj:`bool`, optional, defaults to True):
|
||||
This task is set to true if the user would like to perform one of the following loss objectives:
|
||||
object predicition, atrribute predicition, feature regression
|
||||
task_qa (:obj:`bool`, optional, defaults to True):
|
||||
This task specifies whether or not Lxmert will calculate the question-asnwering loss objective
|
||||
visual_obj_loss (:obj:`bool`, optional, defaults to True):
|
||||
This task specifies whether or not Lxmert will calculate the object-prediction loss objective
|
||||
visual_attr_loss (:obj:`bool`, optional, defaults to True):
|
||||
This task specifies whether or not Lxmert will calculate the attribute-prediction loss objective
|
||||
visual_feat_loss (:obj:`bool`, optional, defaults to True):
|
||||
This task specifies whether or not Lxmert will calculate the feature-regression loss objective
|
||||
output_attentions (:obj:`bool`, optional, defaults to False):
|
||||
if True, the vision, langauge, and cross-modality layers will be returned
|
||||
output_hidden_states (:obj:`bool`, optional, defaults to False):
|
||||
if True, final cross-modality hidden states for language and vision features will be returned
|
||||
|
||||
"""
|
||||
|
||||
model_type = "lxmert"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=30522,
|
||||
hidden_size=768,
|
||||
num_attention_heads=12,
|
||||
num_labels=2,
|
||||
num_qa_labels=9500,
|
||||
num_object_labels=1600,
|
||||
num_attr_labels=400,
|
||||
intermediate_size=3072,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=2,
|
||||
initializer_range=0.02,
|
||||
layer_norm_eps=1e-12,
|
||||
pad_token_id=0,
|
||||
l_layers=9,
|
||||
x_layers=5,
|
||||
r_layers=5,
|
||||
visual_feat_dim=2048,
|
||||
visual_pos_dim=4,
|
||||
visual_loss_normalizer=6.67,
|
||||
task_matched=True,
|
||||
task_mask_lm=True,
|
||||
task_obj_predict=True,
|
||||
task_qa=True,
|
||||
visual_obj_loss=True,
|
||||
visual_attr_loss=True,
|
||||
visual_feat_loss=True,
|
||||
output_attentions=False,
|
||||
output_hidden_states=False,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.num_labels = num_labels
|
||||
self.hidden_act = hidden_act
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.initializer_range = initializer_range
|
||||
self.layer_norm_eps = layer_norm_eps
|
||||
self.num_qa_labels = num_qa_labels
|
||||
self.num_object_labels = num_object_labels
|
||||
self.num_attr_labels = num_attr_labels
|
||||
self.l_layers = l_layers
|
||||
self.x_layers = x_layers
|
||||
self.r_layers = r_layers
|
||||
self.visual_feat_dim = visual_feat_dim
|
||||
self.visual_pos_dim = visual_pos_dim
|
||||
self.visual_loss_normalizer = visual_loss_normalizer
|
||||
self.task_matched = task_matched
|
||||
self.task_mask_lm = task_mask_lm
|
||||
self.task_obj_predict = task_obj_predict
|
||||
self.task_qa = task_qa
|
||||
self.visual_obj_loss = visual_obj_loss
|
||||
self.visual_attr_loss = visual_attr_loss
|
||||
self.visual_feat_loss = visual_feat_loss
|
||||
self.output_hidden_states = output_hidden_states
|
||||
self.output_attentions = self.output_attentions
|
||||
self.num_hidden_layers = {"vision": r_layers, "cross_encoder": x_layers, "language": l_layers}
|
||||
61
src/transformers/convert_lxmert_original_tf_checkpoint_to_pytorch.py
Executable file
61
src/transformers/convert_lxmert_original_tf_checkpoint_to_pytorch.py
Executable file
@@ -0,0 +1,61 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Convert LXMERT checkpoint."""
|
||||
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
import torch
|
||||
|
||||
from transformers import LxmertConfig, LxmertForPreTraining, load_tf_weights_in_lxmert
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
|
||||
# Initialise PyTorch model
|
||||
config = LxmertConfig.from_json_file(config_file)
|
||||
print("Building PyTorch model from configuration: {}".format(str(config)))
|
||||
model = LxmertForPreTraining(config)
|
||||
|
||||
# Load weights from tf checkpoint
|
||||
load_tf_weights_in_lxmert(model, config, tf_checkpoint_path)
|
||||
|
||||
# Save pytorch-model
|
||||
print("Save PyTorch model to {}".format(pytorch_dump_path))
|
||||
torch.save(model.state_dict(), pytorch_dump_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
# Required parameters
|
||||
parser.add_argument(
|
||||
"--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config_file",
|
||||
default=None,
|
||||
type=str,
|
||||
required=True,
|
||||
help="The config json file corresponding to the pre-trained model. \n"
|
||||
"This specifies the model architecture.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
|
||||
@@ -27,6 +27,7 @@ from transformers import (
|
||||
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
@@ -43,6 +44,7 @@ from transformers import (
|
||||
ElectraConfig,
|
||||
FlaubertConfig,
|
||||
GPT2Config,
|
||||
LxmertConfig,
|
||||
OpenAIGPTConfig,
|
||||
RobertaConfig,
|
||||
T5Config,
|
||||
@@ -57,6 +59,8 @@ from transformers import (
|
||||
TFElectraForPreTraining,
|
||||
TFFlaubertWithLMHeadModel,
|
||||
TFGPT2LMHeadModel,
|
||||
TFLxmertForPreTraining,
|
||||
TFLxmertVisualFeatureEncoder,
|
||||
TFOpenAIGPTLMHeadModel,
|
||||
TFRobertaForMaskedLM,
|
||||
TFRobertaForSequenceClassification,
|
||||
@@ -94,6 +98,8 @@ if is_torch_available():
|
||||
ElectraForPreTraining,
|
||||
FlaubertWithLMHeadModel,
|
||||
GPT2LMHeadModel,
|
||||
LxmertForPreTraining,
|
||||
LxmertVisualFeatureEncoder,
|
||||
OpenAIGPTLMHeadModel,
|
||||
RobertaForMaskedLM,
|
||||
RobertaForSequenceClassification,
|
||||
@@ -204,6 +210,18 @@ MODEL_CLASSES = {
|
||||
DistilBertForQuestionAnswering,
|
||||
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
),
|
||||
"lxmert": (
|
||||
LxmertConfig,
|
||||
TFLxmertForPreTraining,
|
||||
LxmertForPreTraining,
|
||||
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
),
|
||||
"lxmert-visual-feature-encoder": (
|
||||
LxmertConfig,
|
||||
TFLxmertVisualFeatureEncoder,
|
||||
LxmertVisualFeatureEncoder,
|
||||
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
),
|
||||
"ctrl": (
|
||||
CTRLConfig,
|
||||
TFCTRLLMHeadModel,
|
||||
|
||||
@@ -31,6 +31,7 @@ from .configuration_auto import (
|
||||
FlaubertConfig,
|
||||
GPT2Config,
|
||||
LongformerConfig,
|
||||
LxmertConfig,
|
||||
MBartConfig,
|
||||
MobileBertConfig,
|
||||
OpenAIGPTConfig,
|
||||
@@ -116,6 +117,7 @@ from .modeling_longformer import (
|
||||
LongformerForTokenClassification,
|
||||
LongformerModel,
|
||||
)
|
||||
from .modeling_lxmert import LxmertForPreTraining, LxmertModel
|
||||
from .modeling_marian import MarianMTModel
|
||||
from .modeling_mbart import MBartForConditionalGeneration
|
||||
from .modeling_mobilebert import (
|
||||
@@ -200,6 +202,7 @@ MODEL_MAPPING = OrderedDict(
|
||||
(CTRLConfig, CTRLModel),
|
||||
(ElectraConfig, ElectraModel),
|
||||
(ReformerConfig, ReformerModel),
|
||||
(LxmertConfig, LxmertModel),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -224,6 +227,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
|
||||
(XLMConfig, XLMWithLMHeadModel),
|
||||
(CTRLConfig, CTRLLMHeadModel),
|
||||
(ElectraConfig, ElectraForPreTraining),
|
||||
(LxmertConfig, LxmertForPreTraining),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
1426
src/transformers/modeling_lxmert.py
Normal file
1426
src/transformers/modeling_lxmert.py
Normal file
File diff suppressed because it is too large
Load Diff
1378
src/transformers/modeling_tf_lxmert.py
Normal file
1378
src/transformers/modeling_tf_lxmert.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -883,7 +883,7 @@ MOBILEBERT_START_DOCSTRING = r"""
|
||||
|
||||
MOBILEBERT_INPUTS_DOCSTRING = r"""
|
||||
Args:
|
||||
input_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`{0}`):
|
||||
input_ids (:obj:`np.ndarray` or :obj:`tf.Tensor` of shape :obj:`{0}`):
|
||||
Indices of input sequence tokens in the vocabulary.
|
||||
|
||||
Indices can be obtained using :class:`transformers.MobileBertTokenizer`.
|
||||
@@ -891,28 +891,28 @@ MOBILEBERT_INPUTS_DOCSTRING = r"""
|
||||
:func:`transformers.PreTrainedTokenizer.__call__` for details.
|
||||
|
||||
`What are input IDs? <../glossary.html#input-ids>`__
|
||||
attention_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
|
||||
attention_mask (:obj:`np.ndarray` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
|
||||
Mask to avoid performing attention on padding token indices.
|
||||
Mask values selected in ``[0, 1]``:
|
||||
``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
|
||||
|
||||
`What are attention masks? <../glossary.html#attention-mask>`__
|
||||
token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
|
||||
token_type_ids (:obj:`np.ndarray` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
|
||||
Segment token indices to indicate first and second portions of the inputs.
|
||||
Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1``
|
||||
corresponds to a `sentence B` token
|
||||
|
||||
`What are token type IDs? <../glossary.html#token-type-ids>`__
|
||||
position_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
|
||||
position_ids (:obj:`np.ndarray` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
|
||||
Indices of positions of each input sequence tokens in the position embeddings.
|
||||
Selected in the range ``[0, config.max_position_embeddings - 1]``.
|
||||
|
||||
`What are position IDs? <../glossary.html#position-ids>`__
|
||||
head_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`, defaults to :obj:`None`):
|
||||
head_mask (:obj:`np.ndarray` or :obj:`tf.Tensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`, defaults to :obj:`None`):
|
||||
Mask to nullify selected heads of the self-attention modules.
|
||||
Mask values selected in ``[0, 1]``:
|
||||
:obj:`1` indicates the head is **not masked**, :obj:`0` indicates the head is **masked**.
|
||||
inputs_embeds (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, embedding_dim)`, `optional`, defaults to :obj:`None`):
|
||||
inputs_embeds (:obj:`np.ndarray` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, embedding_dim)`, `optional`, defaults to :obj:`None`):
|
||||
Optionally, instead of passing :obj:`input_ids` you can to directly pass an embedded representation.
|
||||
This is useful if you want more control over how to convert `input_ids` indices into associated vectors
|
||||
than the model's internal embedding lookup matrix.
|
||||
|
||||
@@ -191,7 +191,7 @@ class TFSequenceClassificationLoss:
|
||||
"""
|
||||
|
||||
def compute_loss(self, labels, logits):
|
||||
if shape_list(logits)[1] == 1:
|
||||
if len(shape_list(logits)) == 1 or shape_list(logits)[1] == 1:
|
||||
loss_fn = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)
|
||||
else:
|
||||
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
|
||||
|
||||
@@ -29,6 +29,7 @@ from .configuration_auto import (
|
||||
FlaubertConfig,
|
||||
GPT2Config,
|
||||
LongformerConfig,
|
||||
LxmertConfig,
|
||||
MarianConfig,
|
||||
MBartConfig,
|
||||
MobileBertConfig,
|
||||
@@ -55,6 +56,7 @@ from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
|
||||
from .tokenization_flaubert import FlaubertTokenizer
|
||||
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
|
||||
from .tokenization_longformer import LongformerTokenizer, LongformerTokenizerFast
|
||||
from .tokenization_lxmert import LxmertTokenizer, LxmertTokenizerFast
|
||||
from .tokenization_marian import MarianTokenizer
|
||||
from .tokenization_mbart import MBartTokenizer
|
||||
from .tokenization_mobilebert import MobileBertTokenizer, MobileBertTokenizerFast
|
||||
@@ -91,6 +93,7 @@ TOKENIZER_MAPPING = OrderedDict(
|
||||
(RobertaConfig, (RobertaTokenizer, RobertaTokenizerFast)),
|
||||
(ReformerConfig, (ReformerTokenizer, None)),
|
||||
(ElectraConfig, (ElectraTokenizer, ElectraTokenizerFast)),
|
||||
(LxmertConfig, (LxmertTokenizer, LxmertTokenizerFast)),
|
||||
(BertConfig, (BertTokenizer, BertTokenizerFast)),
|
||||
(OpenAIGPTConfig, (OpenAIGPTTokenizer, OpenAIGPTTokenizerFast)),
|
||||
(GPT2Config, (GPT2Tokenizer, GPT2TokenizerFast)),
|
||||
@@ -128,6 +131,7 @@ class AutoTokenizer:
|
||||
- `xlm`: XLMTokenizer (XLM model)
|
||||
- `ctrl`: CTRLTokenizer (Salesforce CTRL model)
|
||||
- `electra`: ElectraTokenizer (Google ELECTRA model)
|
||||
- `lxmert`: LxmertTokenizer (Lxmert model)
|
||||
|
||||
This class cannot be instantiated using `__init__()` (throw an error).
|
||||
"""
|
||||
@@ -163,6 +167,7 @@ class AutoTokenizer:
|
||||
- `xlm`: XLMTokenizer (XLM model)
|
||||
- `ctrl`: CTRLTokenizer (Salesforce CTRL model)
|
||||
- `electra`: ElectraTokenizer (Google ELECTRA model)
|
||||
- `lxmert`: LxmertTokenizer (Lxmert model)
|
||||
|
||||
Params:
|
||||
pretrained_model_name_or_path: either:
|
||||
|
||||
80
src/transformers/tokenization_lxmert.py
Normal file
80
src/transformers/tokenization_lxmert.py
Normal file
@@ -0,0 +1,80 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The Google AI Team, Stanford University and The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from .tokenization_bert import BertTokenizer, BertTokenizerFast
|
||||
|
||||
|
||||
####################################################
|
||||
# Mapping from the keyword arguments names of Tokenizer `__init__`
|
||||
# to file names for serializing Tokenizer instances
|
||||
####################################################
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
||||
|
||||
####################################################
|
||||
# Mapping from the keyword arguments names of Tokenizer `__init__`
|
||||
# to pretrained vocabulary URL for all the model shortcut names.
|
||||
####################################################
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"unc-nlp/lxmert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
|
||||
}
|
||||
}
|
||||
|
||||
####################################################
|
||||
# Mapping from model shortcut names to max length of inputs
|
||||
####################################################
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"unc-nlp/lxmert-base-uncased": 512,
|
||||
}
|
||||
####################################################
|
||||
# Mapping from model shortcut names to a dictionary of additional
|
||||
# keyword arguments for Tokenizer `__init__`.
|
||||
# To be used for checkpoint specific configurations.
|
||||
####################################################
|
||||
PRETRAINED_INIT_CONFIGURATION = {
|
||||
"unc-nlp/lxmert-base-uncased": {"do_lower_case": True},
|
||||
}
|
||||
|
||||
|
||||
class LxmertTokenizer(BertTokenizer):
|
||||
r"""
|
||||
Constructs an Lxmert tokenizer.
|
||||
:class:`~transformers.LxmertTokenizer` is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
|
||||
tokenization: punctuation splitting + wordpiece.
|
||||
|
||||
Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
|
||||
|
||||
class LxmertTokenizerFast(BertTokenizerFast):
|
||||
r"""
|
||||
Constructs a "Fast" Lxmert Fast tokenizer (backed by HuggingFace's `tokenizers` library).
|
||||
|
||||
:class:`~transformers.LxmertTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs end-to-end
|
||||
tokenization: punctuation splitting + wordpiece.
|
||||
|
||||
Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
Reference in New Issue
Block a user