ConvBERT Model (#9717)

* finalize convbert

* finalize convbert

* fix

* fix

* fix

* push

* fix

* tf image patches

* fix torch model

* tf tests

* conversion

* everything aligned

* remove print

* tf tests

* fix tf

* make tf tests pass

* everything works

* fix init

* fix

* special treatment for sepconv1d

* style

* 🙏🏽

* add doc and cleanup

* add electra test again

* fix doc

* fix doc again

* fix doc again

* Update src/transformers/modeling_tf_pytorch_utils.py

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

* Update src/transformers/models/conv_bert/configuration_conv_bert.py

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

* Update docs/source/model_doc/conv_bert.rst

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/auto/configuration_auto.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/conv_bert/configuration_conv_bert.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* conv_bert -> convbert

* more fixes from review

* add conversion script

* dont use pretrained embed

* unused config

* suggestions from julien

* some more fixes

* p -> param

* fix copyright

* fix doc

* Update src/transformers/models/convbert/configuration_convbert.py

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

* comments from reviews

* fix-copies

* fix style

* revert shape_list

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
abhishek thakur
2021-01-27 09:20:09 +01:00
committed by GitHub
parent e575e06287
commit f617490e71
25 changed files with 4538 additions and 33 deletions

View File

@@ -125,6 +125,7 @@ _import_structure = {
],
"models": [],
# Models
"models.convbert": ["CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ConvBertConfig", "ConvBertTokenizer"],
"models.albert": ["ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "AlbertConfig"],
"models.auto": [
"ALL_PRETRAINED_CONFIG_ARCHIVE_MAP",
@@ -275,6 +276,7 @@ else:
# tokenziers-backed objects
if is_tokenizers_available():
# Fast tokenizers
_import_structure["models.convbert"].append("ConvBertTokenizerFast")
_import_structure["models.albert"].append("AlbertTokenizerFast")
_import_structure["models.bart"].append("BartTokenizerFast")
_import_structure["models.barthez"].append("BarthezTokenizerFast")
@@ -360,6 +362,21 @@ if is_torch_available():
_import_structure["generation_utils"] = ["top_k_top_p_filtering"]
_import_structure["modeling_utils"] = ["Conv1D", "PreTrainedModel", "apply_chunking_to_forward", "prune_layer"]
# PyTorch models structure
_import_structure["models.convbert"].extend(
[
"CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
"ConvBertForMaskedLM",
"ConvBertForMultipleChoice",
"ConvBertForQuestionAnswering",
"ConvBertForSequenceClassification",
"ConvBertForTokenClassification",
"ConvBertLayer",
"ConvBertModel",
"ConvBertPreTrainedModel",
"load_tf_weights_in_convbert",
]
)
_import_structure["models.albert"].extend(
[
"ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
@@ -816,6 +833,20 @@ if is_tf_available():
"shape_list",
]
# TensorFlow models structure
_import_structure["models.convbert"].extend(
[
"TF_CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFConvBertForMaskedLM",
"TFConvBertForMultipleChoice",
"TFConvBertForQuestionAnswering",
"TFConvBertForSequenceClassification",
"TFConvBertForTokenClassification",
"TFConvBertLayer",
"TFConvBertModel",
"TFConvBertPreTrainedModel",
]
)
_import_structure["models.albert"].extend(
[
"TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
@@ -1235,6 +1266,7 @@ if TYPE_CHECKING:
BlenderbotSmallTokenizer,
)
from .models.camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig
from .models.convbert import CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ConvBertConfig, ConvBertTokenizer
from .models.ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig, CTRLTokenizer
from .models.deberta import DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaConfig, DebertaTokenizer
from .models.distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig, DistilBertTokenizer
@@ -1359,6 +1391,7 @@ if TYPE_CHECKING:
from .models.barthez import BarthezTokenizerFast
from .models.bert import BertTokenizerFast
from .models.camembert import CamembertTokenizerFast
from .models.convbert import ConvBertTokenizerFast
from .models.distilbert import DistilBertTokenizerFast
from .models.dpr import DPRContextEncoderTokenizerFast, DPRQuestionEncoderTokenizerFast, DPRReaderTokenizerFast
from .models.electra import ElectraTokenizerFast
@@ -1521,6 +1554,18 @@ if TYPE_CHECKING:
CamembertForTokenClassification,
CamembertModel,
)
from .models.convbert import (
CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
ConvBertForMaskedLM,
ConvBertForMultipleChoice,
ConvBertForQuestionAnswering,
ConvBertForSequenceClassification,
ConvBertForTokenClassification,
ConvBertLayer,
ConvBertModel,
ConvBertPreTrainedModel,
load_tf_weights_in_convbert,
)
from .models.ctrl import (
CTRL_PRETRAINED_MODEL_ARCHIVE_LIST,
CTRLForSequenceClassification,
@@ -1879,6 +1924,17 @@ if TYPE_CHECKING:
TFCamembertForTokenClassification,
TFCamembertModel,
)
from .models.convbert import (
TF_CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
TFConvBertForMaskedLM,
TFConvBertForMultipleChoice,
TFConvBertForQuestionAnswering,
TFConvBertForSequenceClassification,
TFConvBertForTokenClassification,
TFConvBertLayer,
TFConvBertModel,
TFConvBertPreTrainedModel,
)
from .models.ctrl import (
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST,
TFCTRLForSequenceClassification,

View File

@@ -605,6 +605,7 @@ SLOW_TO_FAST_CONVERTERS = {
"BarthezTokenizer": BarthezConverter,
"BertTokenizer": BertConverter,
"CamembertTokenizer": CamembertConverter,
"ConvBertTokenizer": BertConverter,
"DistilBertTokenizer": BertConverter,
"DPRReaderTokenizer": BertConverter,
"DPRQuestionEncoderTokenizer": BertConverter,

View File

@@ -64,6 +64,10 @@ def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_remove="")
if tf_name[-1] == "beta":
tf_name[-1] = "bias"
# The SeparableConv1D TF layer contains two weights that are translated to PyTorch Conv1D here
if tf_name[-1] == "pointwise_kernel" or tf_name[-1] == "depthwise_kernel":
tf_name[-1] = tf_name[-1].replace("_kernel", ".weight")
# Remove prefix if needed
tf_name = ".".join(tf_name)
if start_prefix_to_remove:
@@ -127,7 +131,6 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
if tf_inputs is not None:
tf_model(tf_inputs, training=False) # Make sure model is built
# Adapt state dict - TODO remove this and update the AWS weights files instead
# Convert old format to new format if needed from a PyTorch state_dict
old_keys = []

View File

@@ -28,6 +28,7 @@ from . import (
blenderbot,
blenderbot_small,
camembert,
convbert,
ctrl,
deberta,
dialogpt,

View File

@@ -28,6 +28,7 @@ from ..blenderbot_small.configuration_blenderbot_small import (
BlenderbotSmallConfig,
)
from ..camembert.configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig
from ..convbert.configuration_convbert import CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ConvBertConfig
from ..ctrl.configuration_ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig
from ..deberta.configuration_deberta import DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaConfig
from ..distilbert.configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig
@@ -71,6 +72,7 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
(key, value)
for pretrained_map in [
# Add archive maps here
CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
LED_PRETRAINED_CONFIG_ARCHIVE_MAP,
BLENDERBOT_SMALL_PRETRAINED_CONFIG_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
@@ -112,6 +114,7 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
CONFIG_MAPPING = OrderedDict(
[
# Add configs here
("convbert", ConvBertConfig),
("led", LEDConfig),
("blenderbot-small", BlenderbotSmallConfig),
("retribert", RetriBertConfig),
@@ -159,6 +162,7 @@ CONFIG_MAPPING = OrderedDict(
MODEL_NAMES_MAPPING = OrderedDict(
[
# Add full (and cased) model names here
("convbert", "ConvBERT"),
("led", "LED"),
("blenderbot-small", "BlenderbotSmall"),
("retribert", "RetriBERT"),

View File

@@ -61,6 +61,16 @@ from ..camembert.modeling_camembert import (
CamembertForTokenClassification,
CamembertModel,
)
# Add modeling imports here
from ..convbert.modeling_convbert import (
ConvBertForMaskedLM,
ConvBertForMultipleChoice,
ConvBertForQuestionAnswering,
ConvBertForSequenceClassification,
ConvBertForTokenClassification,
ConvBertModel,
)
from ..ctrl.modeling_ctrl import CTRLForSequenceClassification, CTRLLMHeadModel, CTRLModel
from ..deberta.modeling_deberta import (
DebertaForMaskedLM,
@@ -234,6 +244,7 @@ from .configuration_auto import (
BlenderbotConfig,
BlenderbotSmallConfig,
CamembertConfig,
ConvBertConfig,
CTRLConfig,
DebertaConfig,
DistilBertConfig,
@@ -277,6 +288,7 @@ logger = logging.get_logger(__name__)
MODEL_MAPPING = OrderedDict(
[
# Base model mapping
(ConvBertConfig, ConvBertModel),
(LEDConfig, LEDModel),
(BlenderbotSmallConfig, BlenderbotSmallModel),
(RetriBertConfig, RetriBertModel),
@@ -355,6 +367,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
[
# Model with LM heads mapping
(ConvBertConfig, ConvBertForMaskedLM),
(LEDConfig, LEDForConditionalGeneration),
(BlenderbotSmallConfig, BlenderbotSmallForConditionalGeneration),
(LayoutLMConfig, LayoutLMForMaskedLM),
@@ -414,6 +427,7 @@ MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
[
# Model for Masked LM mapping
(ConvBertConfig, ConvBertForMaskedLM),
(LayoutLMConfig, LayoutLMForMaskedLM),
(DistilBertConfig, DistilBertForMaskedLM),
(AlbertConfig, AlbertForMaskedLM),
@@ -459,6 +473,7 @@ MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = OrderedDict(
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
[
# Model for Sequence Classification mapping
(ConvBertConfig, ConvBertForSequenceClassification),
(LEDConfig, LEDForSequenceClassification),
(DistilBertConfig, DistilBertForSequenceClassification),
(AlbertConfig, AlbertForSequenceClassification),
@@ -491,6 +506,7 @@ MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
[
# Model for Question Answering mapping
(ConvBertConfig, ConvBertForQuestionAnswering),
(LEDConfig, LEDForQuestionAnswering),
(DistilBertConfig, DistilBertForQuestionAnswering),
(AlbertConfig, AlbertForQuestionAnswering),
@@ -525,6 +541,7 @@ MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = OrderedDict(
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
[
# Model for Token Classification mapping
(ConvBertConfig, ConvBertForTokenClassification),
(LayoutLMConfig, LayoutLMForTokenClassification),
(DistilBertConfig, DistilBertForTokenClassification),
(CamembertConfig, CamembertForTokenClassification),
@@ -549,6 +566,7 @@ MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
[
# Model for Multiple Choice mapping
(ConvBertConfig, ConvBertForMultipleChoice),
(CamembertConfig, CamembertForMultipleChoice),
(ElectraConfig, ElectraForMultipleChoice),
(XLMRobertaConfig, XLMRobertaForMultipleChoice),

View File

@@ -57,6 +57,14 @@ from ..camembert.modeling_tf_camembert import (
TFCamembertForTokenClassification,
TFCamembertModel,
)
from ..convbert.modeling_tf_convbert import (
TFConvBertForMaskedLM,
TFConvBertForMultipleChoice,
TFConvBertForQuestionAnswering,
TFConvBertForSequenceClassification,
TFConvBertForTokenClassification,
TFConvBertModel,
)
from ..ctrl.modeling_tf_ctrl import TFCTRLForSequenceClassification, TFCTRLLMHeadModel, TFCTRLModel
from ..distilbert.modeling_tf_distilbert import (
TFDistilBertForMaskedLM,
@@ -173,6 +181,7 @@ from .configuration_auto import (
BlenderbotConfig,
BlenderbotSmallConfig,
CamembertConfig,
ConvBertConfig,
CTRLConfig,
DistilBertConfig,
DPRConfig,
@@ -206,6 +215,7 @@ logger = logging.get_logger(__name__)
TF_MODEL_MAPPING = OrderedDict(
[
# Base model mapping
(ConvBertConfig, TFConvBertModel),
(LEDConfig, TFLEDModel),
(LxmertConfig, TFLxmertModel),
(MT5Config, TFMT5Model),
@@ -268,6 +278,7 @@ TF_MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
TF_MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
[
# Model with LM heads mapping
(ConvBertConfig, TFConvBertForMaskedLM),
(LEDConfig, TFLEDForConditionalGeneration),
(T5Config, TFT5ForConditionalGeneration),
(DistilBertConfig, TFDistilBertForMaskedLM),
@@ -312,6 +323,7 @@ TF_MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
TF_MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
[
# Model for Masked LM mapping
(ConvBertConfig, TFConvBertForMaskedLM),
(DistilBertConfig, TFDistilBertForMaskedLM),
(AlbertConfig, TFAlbertForMaskedLM),
(CamembertConfig, TFCamembertForMaskedLM),
@@ -347,6 +359,7 @@ TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = OrderedDict(
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
[
# Model for Sequence Classification mapping
(ConvBertConfig, TFConvBertForSequenceClassification),
(DistilBertConfig, TFDistilBertForSequenceClassification),
(AlbertConfig, TFAlbertForSequenceClassification),
(CamembertConfig, TFCamembertForSequenceClassification),
@@ -371,6 +384,7 @@ TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
[
# Model for Question Answering mapping
(ConvBertConfig, TFConvBertForQuestionAnswering),
(DistilBertConfig, TFDistilBertForQuestionAnswering),
(AlbertConfig, TFAlbertForQuestionAnswering),
(CamembertConfig, TFCamembertForQuestionAnswering),
@@ -391,6 +405,7 @@ TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
[
# Model for Token Classification mapping
(ConvBertConfig, TFConvBertForTokenClassification),
(DistilBertConfig, TFDistilBertForTokenClassification),
(AlbertConfig, TFAlbertForTokenClassification),
(CamembertConfig, TFCamembertForTokenClassification),
@@ -411,6 +426,7 @@ TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
[
# Model for Multiple Choice mapping
(ConvBertConfig, TFConvBertForMultipleChoice),
(CamembertConfig, TFCamembertForMultipleChoice),
(XLMConfig, TFXLMForMultipleChoice),
(XLMRobertaConfig, TFXLMRobertaForMultipleChoice),

View File

@@ -26,6 +26,7 @@ from ..bert_japanese.tokenization_bert_japanese import BertJapaneseTokenizer
from ..bertweet.tokenization_bertweet import BertweetTokenizer
from ..blenderbot.tokenization_blenderbot import BlenderbotTokenizer
from ..blenderbot_small.tokenization_blenderbot_small import BlenderbotSmallTokenizer
from ..convbert.tokenization_convbert import ConvBertTokenizer
from ..ctrl.tokenization_ctrl import CTRLTokenizer
from ..deberta.tokenization_deberta import DebertaTokenizer
from ..distilbert.tokenization_distilbert import DistilBertTokenizer
@@ -61,6 +62,7 @@ from .configuration_auto import (
BlenderbotConfig,
BlenderbotSmallConfig,
CamembertConfig,
ConvBertConfig,
CTRLConfig,
DebertaConfig,
DistilBertConfig,
@@ -134,6 +136,7 @@ if is_tokenizers_available():
from ..barthez.tokenization_barthez_fast import BarthezTokenizerFast
from ..bert.tokenization_bert_fast import BertTokenizerFast
from ..camembert.tokenization_camembert_fast import CamembertTokenizerFast
from ..convbert.tokenization_convbert_fast import ConvBertTokenizerFast
from ..distilbert.tokenization_distilbert_fast import DistilBertTokenizerFast
from ..dpr.tokenization_dpr_fast import DPRQuestionEncoderTokenizerFast
from ..electra.tokenization_electra_fast import ElectraTokenizerFast
@@ -163,6 +166,7 @@ else:
BarthezTokenizerFast = None
BertTokenizerFast = None
CamembertTokenizerFast = None
ConvBertTokenizerFast = None
DistilBertTokenizerFast = None
DPRQuestionEncoderTokenizerFast = None
ElectraTokenizerFast = None
@@ -233,6 +237,7 @@ TOKENIZER_MAPPING = OrderedDict(
(MPNetConfig, (MPNetTokenizer, MPNetTokenizerFast)),
(TapasConfig, (TapasTokenizer, None)),
(LEDConfig, (LEDTokenizer, LEDTokenizerFast)),
(ConvBertConfig, (ConvBertTokenizer, ConvBertTokenizerFast)),
]
)

View File

@@ -0,0 +1,111 @@
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING
from ...file_utils import _BaseLazyModule, is_tf_available, is_tokenizers_available, is_torch_available
_import_structure = {
"configuration_convbert": ["CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ConvBertConfig"],
"tokenization_convbert": ["ConvBertTokenizer"],
}
if is_tokenizers_available():
_import_structure["tokenization_convbert_fast"] = ["ConvBertTokenizerFast"]
if is_torch_available():
_import_structure["modeling_convbert"] = [
"CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
"ConvBertForMaskedLM",
"ConvBertForMultipleChoice",
"ConvBertForQuestionAnswering",
"ConvBertForSequenceClassification",
"ConvBertForTokenClassification",
"ConvBertLayer",
"ConvBertModel",
"ConvBertPreTrainedModel",
"load_tf_weights_in_convbert",
]
if is_tf_available():
_import_structure["modeling_tf_convbert"] = [
"TF_CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFConvBertForMaskedLM",
"TFConvBertForMultipleChoice",
"TFConvBertForQuestionAnswering",
"TFConvBertForSequenceClassification",
"TFConvBertForTokenClassification",
"TFConvBertLayer",
"TFConvBertModel",
"TFConvBertPreTrainedModel",
]
if TYPE_CHECKING:
from .configuration_convbert import CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ConvBertConfig
from .tokenization_convbert import ConvBertTokenizer
if is_tokenizers_available():
from .tokenization_convbert_fast import ConvBertTokenizerFast
if is_torch_available():
from .modeling_convbert import (
CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
ConvBertForMaskedLM,
ConvBertForMultipleChoice,
ConvBertForQuestionAnswering,
ConvBertForSequenceClassification,
ConvBertForTokenClassification,
ConvBertLayer,
ConvBertModel,
ConvBertPreTrainedModel,
load_tf_weights_in_convbert,
)
if is_tf_available():
from .modeling_tf_convbert import (
TF_CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
TFConvBertForMaskedLM,
TFConvBertForMultipleChoice,
TFConvBertForQuestionAnswering,
TFConvBertForSequenceClassification,
TFConvBertForTokenClassification,
TFConvBertLayer,
TFConvBertModel,
TFConvBertPreTrainedModel,
)
else:
import importlib
import os
import sys
class _LazyModule(_BaseLazyModule):
"""
Module class that surfaces all objects but only performs associated imports when the objects are requested.
"""
__file__ = globals()["__file__"]
__path__ = [os.path.dirname(__file__)]
def _get_module(self, module_name: str):
return importlib.import_module("." + module_name, self.__name__)
sys.modules[__name__] = _LazyModule(__name__, _import_structure)

View File

@@ -0,0 +1,136 @@
# coding=utf-8
# Copyright The HuggingFace team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" ConvBERT model configuration """
from ...configuration_utils import PretrainedConfig
from ...utils import logging
logger = logging.get_logger(__name__)
CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"YituTech/conv-bert-base": "https://huggingface.co/YituTech/conv-bert-base/resolve/main/config.json",
"YituTech/conv-bert-medium-small": "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/config.json",
"YituTech/conv-bert-small": "https://huggingface.co/YituTech/conv-bert-small/resolve/main/config.json",
# See all ConvBERT models at https://huggingface.co/models?filter=convbert
}
class ConvBertConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a :class:`~transformers.ConvBertModel`. It is used to
instantiate an ConvBERT model according to the specified arguments, defining the model architecture. Instantiating
a configuration with the defaults will yield a similar configuration to that of the ConvBERT `conv-bert-base
<https://huggingface.co/YituTech/conv-bert-base>`__ architecture. Configuration objects inherit from
:class:`~transformers.PretrainedConfig` and can be used to control the model outputs. Read the documentation from
:class:`~transformers.PretrainedConfig` for more information.
Args:
vocab_size (:obj:`int`, `optional`, defaults to 30522):
Vocabulary size of the ConvBERT model. Defines the number of different tokens that can be represented by
the :obj:`inputs_ids` passed when calling :class:`~transformers.ConvBertModel` or
:class:`~transformers.TFConvBertModel`.
hidden_size (:obj:`int`, `optional`, defaults to 768):
Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 12):
Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (:obj:`int`, `optional`, defaults to 3072):
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"selu"` and :obj:`"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, `optional`, defaults to 2):
The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.ConvBertModel`
or :class:`~transformers.TFConvBertModel`.
initializer_range (:obj:`float`, `optional`, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
The epsilon used by the layer normalization layers.
head_ratio (:obj:`int`, `optional`, defaults to 2):
Ratio gamma to reduce the number of attention heads.
num_groups (:obj:`int`, `optional`, defaults to 1):
The number of groups for grouped linear layers for ConvBert model
conv_kernel_size (:obj:`int`, `optional`, defaults to 9):
The size of the convolutional kernel.
Example::
>>> from transformers import ConvBertModel, ConvBertConfig
>>> # Initializing a ConvBERT convbert-base-uncased style configuration
>>> configuration = ConvBertConfig()
>>> # Initializing a model from the convbert-base-uncased style configuration
>>> model = ConvBertModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
"""
model_type = "convbert"
def __init__(
self,
vocab_size=30522,
hidden_size=768,
is_encoder_decoder=False,
num_hidden_layers=12,
num_attention_heads=12,
intermediate_size=3072,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=2,
initializer_range=0.02,
layer_norm_eps=1e-12,
pad_token_id=1,
bos_token_id=0,
eos_token_id=2,
embedding_size=768,
head_ratio=2,
conv_kernel_size=9,
num_groups=1,
**kwargs,
):
super().__init__(
pad_token_id=pad_token_id,
is_encoder_decoder=is_encoder_decoder,
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
**kwargs,
)
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.initializer_range = initializer_range
self.layer_norm_eps = layer_norm_eps
self.embedding_size = embedding_size
self.head_ratio = head_ratio
self.conv_kernel_size = conv_kernel_size
self.num_groups = num_groups

View File

@@ -0,0 +1,52 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Convert ConvBERT checkpoint."""
import argparse
from ...utils import logging
from .modeling_convbert import ConvBertConfig, ConvBertModel, load_tf_weights_in_convbert
logging.set_verbosity_info()
def convert_orig_tf1_checkpoint_to_pytorch(tf_checkpoint_path, convbert_config_file, pytorch_dump_path):
conf = ConvBertConfig.from_json_file(convbert_config_file)
model = ConvBertModel(conf)
model = load_tf_weights_in_convbert(model, conf, tf_checkpoint_path)
model.save_pretrained(pytorch_dump_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# Required parameters
parser.add_argument(
"--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
)
parser.add_argument(
"--convbert_config_file",
default=None,
type=str,
required=True,
help="The config json file corresponding to the pre-trained ConvBERT model. \n"
"This specifies the model architecture.",
)
parser.add_argument(
"--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
)
args = parser.parse_args()
convert_orig_tf1_checkpoint_to_pytorch(args.tf_checkpoint_path, args.conv_bert_config_file, args.pytorch_dump_path)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,56 @@
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tokenization classes for ConvBERT."""
from ...utils import logging
from ..bert.tokenization_bert import BertTokenizer
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"YituTech/conv-bert-base": "https://huggingface.co/YituTech/conv-bert-base/resolve/main/vocab.txt",
"YituTech/conv-bert-medium-small": "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/vocab.txt",
"YituTech/conv-bert-small": "https://huggingface.co/YituTech/conv-bert-small/resolve/main/vocab.txt",
}
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"YituTech/conv-bert-base": 512,
"YituTech/conv-bert-medium-small": 512,
"YituTech/conv-bert-small": 512,
}
PRETRAINED_INIT_CONFIGURATION = {
"YituTech/conv-bert-base": {"do_lower_case": True},
"YituTech/conv-bert-medium-small": {"do_lower_case": True},
"YituTech/conv-bert-small": {"do_lower_case": True},
}
class ConvBertTokenizer(BertTokenizer):
r"""
Construct a ConvBERT tokenizer. :class:`~transformers.ConvBertTokenizer` is identical to
:class:`~transformers.BertTokenizer` and runs end-to-end tokenization: punctuation splitting and wordpiece. Refer
to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning parameters.
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION

View File

@@ -0,0 +1,61 @@
# coding=utf-8
# Copyright The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tokenization classes for ConvBERT."""
from ...utils import logging
from ..bert.tokenization_bert_fast import BertTokenizerFast
from .tokenization_convbert import ConvBertTokenizer
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"YituTech/conv-bert-base": "https://huggingface.co/YituTech/conv-bert-base/resolve/main/vocab.txt",
"YituTech/conv-bert-medium-small": "https://huggingface.co/YituTech/conv-bert-medium-small/resolve/main/vocab.txt",
"YituTech/conv-bert-small": "https://huggingface.co/YituTech/conv-bert-small/resolve/main/vocab.txt",
}
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"YituTech/conv-bert-base": 512,
"YituTech/conv-bert-medium-small": 512,
"YituTech/conv-bert-small": 512,
}
PRETRAINED_INIT_CONFIGURATION = {
"YituTech/conv-bert-base": {"do_lower_case": True},
"YituTech/conv-bert-medium-small": {"do_lower_case": True},
"YituTech/conv-bert-small": {"do_lower_case": True},
}
class ConvBertTokenizerFast(BertTokenizerFast):
r"""
Construct a "fast" ConvBERT tokenizer (backed by HuggingFace's `tokenizers` library).
:class:`~transformers.ConvBertTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs
end-to-end tokenization: punctuation splitting and wordpiece.
Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
parameters.
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
slow_tokenizer_class = ConvBertTokenizer

View File

@@ -697,6 +697,81 @@ class CamembertModel:
requires_pytorch(self)
CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST = None
class ConvBertForMaskedLM:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class ConvBertForMultipleChoice:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class ConvBertForQuestionAnswering:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class ConvBertForSequenceClassification:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class ConvBertForTokenClassification:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class ConvBertLayer:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
class ConvBertModel:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class ConvBertPreTrainedModel:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
def load_tf_weights_in_convbert(*args, **kwargs):
requires_pytorch(load_tf_weights_in_convbert)
CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = None

View File

@@ -453,6 +453,77 @@ class TFCamembertModel:
requires_tf(self)
TF_CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST = None
class TFConvBertForMaskedLM:
def __init__(self, *args, **kwargs):
requires_tf(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tf(self)
class TFConvBertForMultipleChoice:
def __init__(self, *args, **kwargs):
requires_tf(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tf(self)
class TFConvBertForQuestionAnswering:
def __init__(self, *args, **kwargs):
requires_tf(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tf(self)
class TFConvBertForSequenceClassification:
def __init__(self, *args, **kwargs):
requires_tf(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tf(self)
class TFConvBertForTokenClassification:
def __init__(self, *args, **kwargs):
requires_tf(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tf(self)
class TFConvBertLayer:
def __init__(self, *args, **kwargs):
requires_tf(self)
class TFConvBertModel:
def __init__(self, *args, **kwargs):
requires_tf(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tf(self)
class TFConvBertPreTrainedModel:
def __init__(self, *args, **kwargs):
requires_tf(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tf(self)
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST = None

View File

@@ -47,6 +47,15 @@ class CamembertTokenizerFast:
requires_tokenizers(self)
class ConvBertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_tokenizers(self)
class DistilBertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_tokenizers(self)