Integrate DeBERTa v2(the 1.5B model surpassed human performance on Su… (#10018)

* Integrate DeBERTa v2(the 1.5B model surpassed human performance on SuperGLUE); Add DeBERTa v2 900M,1.5B models;

* DeBERTa-v2

* Fix v2 model loading issue (#10129)

* Doc members

* Update src/transformers/models/deberta/modeling_deberta.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Address Sylvain's comments

* Address Patrick's comments

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

* Style

Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
Pengcheng He
2021-02-19 15:34:44 -08:00
committed by GitHub
parent f6e53e3c2b
commit 9a7e63729f
19 changed files with 3012 additions and 73 deletions

View File

@@ -157,6 +157,7 @@ _import_structure = {
"models.camembert": ["CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "CamembertConfig"],
"models.ctrl": ["CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP", "CTRLConfig", "CTRLTokenizer"],
"models.deberta": ["DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP", "DebertaConfig", "DebertaTokenizer"],
"models.deberta_v2": ["DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP", "DebertaV2Config", "DebertaV2Tokenizer"],
"models.distilbert": ["DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "DistilBertConfig", "DistilBertTokenizer"],
"models.dpr": [
"DPR_PRETRAINED_CONFIG_ARCHIVE_MAP",
@@ -515,6 +516,17 @@ if is_torch_available():
"DebertaForQuestionAnswering",
]
)
_import_structure["models.deberta_v2"].extend(
[
"DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST",
"DebertaV2ForSequenceClassification",
"DebertaV2Model",
"DebertaV2ForMaskedLM",
"DebertaV2PreTrainedModel",
"DebertaV2ForTokenClassification",
"DebertaV2ForQuestionAnswering",
]
)
_import_structure["models.distilbert"].extend(
[
"DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
@@ -1287,6 +1299,7 @@ if TYPE_CHECKING:
from .models.convbert import CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ConvBertConfig, ConvBertTokenizer
from .models.ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig, CTRLTokenizer
from .models.deberta import DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaConfig, DebertaTokenizer
from .models.deberta_v2 import DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaV2Config, DebertaV2Tokenizer
from .models.distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig, DistilBertTokenizer
from .models.dpr import (
DPR_PRETRAINED_CONFIG_ARCHIVE_MAP,
@@ -1604,6 +1617,15 @@ if TYPE_CHECKING:
DebertaModel,
DebertaPreTrainedModel,
)
from .models.deberta_v2 import (
DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST,
DebertaV2ForMaskedLM,
DebertaV2ForQuestionAnswering,
DebertaV2ForSequenceClassification,
DebertaV2ForTokenClassification,
DebertaV2Model,
DebertaV2PreTrainedModel,
)
from .models.distilbert import (
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
DistilBertForMaskedLM,

View File

@@ -31,6 +31,7 @@ from ..camembert.configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCH
from ..convbert.configuration_convbert import CONVBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, ConvBertConfig
from ..ctrl.configuration_ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig
from ..deberta.configuration_deberta import DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaConfig
from ..deberta_v2.configuration_deberta_v2 import DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaV2Config
from ..distilbert.configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig
from ..dpr.configuration_dpr import DPR_PRETRAINED_CONFIG_ARCHIVE_MAP, DPRConfig
from ..electra.configuration_electra import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, ElectraConfig
@@ -103,6 +104,7 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
LAYOUTLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
DPR_PRETRAINED_CONFIG_ARCHIVE_MAP,
DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP,
SQUEEZEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLM_PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
@@ -138,6 +140,7 @@ CONFIG_MAPPING = OrderedDict(
("reformer", ReformerConfig),
("longformer", LongformerConfig),
("roberta", RobertaConfig),
("deberta-v2", DebertaV2Config),
("deberta", DebertaConfig),
("flaubert", FlaubertConfig),
("fsmt", FSMTConfig),
@@ -199,6 +202,7 @@ MODEL_NAMES_MAPPING = OrderedDict(
("encoder-decoder", "Encoder decoder"),
("funnel", "Funnel Transformer"),
("lxmert", "LXMERT"),
("deberta-v2", "DeBERTa-v2"),
("deberta", "DeBERTa"),
("layoutlm", "LayoutLM"),
("dpr", "DPR"),
@@ -366,7 +370,6 @@ class AutoConfig:
{'foo': False}
"""
config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
if "model_type" in config_dict:
config_class = CONFIG_MAPPING[config_dict["model_type"]]
return config_class.from_dict(config_dict, **kwargs)

View File

@@ -84,6 +84,13 @@ from ..deberta.modeling_deberta import (
DebertaForTokenClassification,
DebertaModel,
)
from ..deberta_v2.modeling_deberta_v2 import (
DebertaV2ForMaskedLM,
DebertaV2ForQuestionAnswering,
DebertaV2ForSequenceClassification,
DebertaV2ForTokenClassification,
DebertaV2Model,
)
from ..distilbert.modeling_distilbert import (
DistilBertForMaskedLM,
DistilBertForMultipleChoice,
@@ -254,6 +261,7 @@ from .configuration_auto import (
ConvBertConfig,
CTRLConfig,
DebertaConfig,
DebertaV2Config,
DistilBertConfig,
DPRConfig,
ElectraConfig,
@@ -332,6 +340,7 @@ MODEL_MAPPING = OrderedDict(
(LxmertConfig, LxmertModel),
(BertGenerationConfig, BertGenerationEncoder),
(DebertaConfig, DebertaModel),
(DebertaV2Config, DebertaV2Model),
(DPRConfig, DPRQuestionEncoder),
(XLMProphetNetConfig, XLMProphetNetModel),
(ProphetNetConfig, ProphetNetModel),
@@ -408,6 +417,7 @@ MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
(MPNetConfig, MPNetForMaskedLM),
(TapasConfig, TapasForMaskedLM),
(DebertaConfig, DebertaForMaskedLM),
(DebertaV2Config, DebertaV2ForMaskedLM),
]
)
@@ -465,6 +475,7 @@ MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
(MPNetConfig, MPNetForMaskedLM),
(TapasConfig, TapasForMaskedLM),
(DebertaConfig, DebertaForMaskedLM),
(DebertaV2Config, DebertaV2ForMaskedLM),
]
)
@@ -510,6 +521,7 @@ MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
(ElectraConfig, ElectraForSequenceClassification),
(FunnelConfig, FunnelForSequenceClassification),
(DebertaConfig, DebertaForSequenceClassification),
(DebertaV2Config, DebertaV2ForSequenceClassification),
(GPT2Config, GPT2ForSequenceClassification),
(OpenAIGPTConfig, OpenAIGPTForSequenceClassification),
(ReformerConfig, ReformerForSequenceClassification),
@@ -545,6 +557,7 @@ MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
(LxmertConfig, LxmertForQuestionAnswering),
(MPNetConfig, MPNetForQuestionAnswering),
(DebertaConfig, DebertaForQuestionAnswering),
(DebertaV2Config, DebertaV2ForQuestionAnswering),
]
)
@@ -577,6 +590,7 @@ MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
(FunnelConfig, FunnelForTokenClassification),
(MPNetConfig, MPNetForTokenClassification),
(DebertaConfig, DebertaForTokenClassification),
(DebertaV2Config, DebertaV2ForTokenClassification),
]
)

View File

@@ -66,6 +66,7 @@ from .configuration_auto import (
ConvBertConfig,
CTRLConfig,
DebertaConfig,
DebertaV2Config,
DistilBertConfig,
DPRConfig,
ElectraConfig,
@@ -108,6 +109,7 @@ if is_sentencepiece_available():
from ..barthez.tokenization_barthez import BarthezTokenizer
from ..bert_generation.tokenization_bert_generation import BertGenerationTokenizer
from ..camembert.tokenization_camembert import CamembertTokenizer
from ..deberta_v2.tokenization_deberta_v2 import DebertaV2Tokenizer
from ..marian.tokenization_marian import MarianTokenizer
from ..mbart.tokenization_mbart import MBartTokenizer
from ..mt5 import MT5Tokenizer
@@ -122,6 +124,7 @@ else:
BarthezTokenizer = None
BertGenerationTokenizer = None
CamembertTokenizer = None
DebertaV2Tokenizer = None
MarianTokenizer = None
MBartTokenizer = None
MT5Tokenizer = None
@@ -233,6 +236,7 @@ TOKENIZER_MAPPING = OrderedDict(
(FSMTConfig, (FSMTTokenizer, None)),
(BertGenerationConfig, (BertGenerationTokenizer, None)),
(DebertaConfig, (DebertaTokenizer, None)),
(DebertaV2Config, (DebertaV2Tokenizer, None)),
(RagConfig, (RagTokenizer, None)),
(XLMProphetNetConfig, (XLMProphetNetTokenizer, None)),
(ProphetNetConfig, (ProphetNetTokenizer, None)),

View File

@@ -23,6 +23,10 @@ logger = logging.get_logger(__name__)
DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"microsoft/deberta-base": "https://huggingface.co/microsoft/deberta-base/resolve/main/config.json",
"microsoft/deberta-large": "https://huggingface.co/microsoft/deberta-large/resolve/main/config.json",
"microsoft/deberta-xlarge": "https://huggingface.co/microsoft/deberta-xlarge/resolve/main/config.json",
"microsoft/deberta-base-mnli": "https://huggingface.co/microsoft/deberta-base-mnli/resolve/main/config.json",
"microsoft/deberta-large-mnli": "https://huggingface.co/microsoft/deberta-large-mnli/resolve/main/config.json",
"microsoft/deberta-xlarge-mnli": "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/config.json",
}

View File

@@ -18,7 +18,6 @@ import math
from collections.abc import Sequence
import torch
from packaging import version
from torch import _softmax_backward_data, nn
from torch.nn import CrossEntropyLoss
@@ -40,10 +39,15 @@ logger = logging.get_logger(__name__)
_CONFIG_FOR_DOC = "DebertaConfig"
_TOKENIZER_FOR_DOC = "DebertaTokenizer"
_CHECKPOINT_FOR_DOC = "microsoft/deberta-base"
DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
"microsoft/deberta-base",
"microsoft/deberta-large",
"microsoft/deberta-xlarge",
"microsoft/deberta-base-mnli",
"microsoft/deberta-large-mnli",
"microsoft/deberta-xlarge-mnli",
]
@@ -54,7 +58,7 @@ class ContextPooler(nn.Module):
self.dropout = StableDropout(config.pooler_dropout)
self.config = config
def forward(self, hidden_states, mask=None):
def forward(self, hidden_states):
# We "pool" the model by simply taking the hidden state corresponding
# to the first token.
@@ -74,27 +78,28 @@ class XSoftmax(torch.autograd.Function):
Masked Softmax which is optimized for saving memory
Args:
input (:obj:`torch.tensor`): The input tensor that will apply softmax.
mask (:obj:`torch.IntTensor`): The mask matrix where 0 indicate that element will be ignored in the softmax calculation.
dim (int): The dimension that will apply softmax
input (:obj:`torch.tensor`): The input tensor that will apply softmax.
mask (:obj:`torch.IntTensor`): The mask matrix where 0 indicate that element will be ignored in the softmax calculation.
dim (int): The dimension that will apply softmax
Example::
import torch
from transformers.models.deberta import XSoftmax
# Make a tensor
x = torch.randn([4,20,100])
# Create a mask
mask = (x>0).int()
y = XSoftmax.apply(x, mask, dim=-1)
>>> import torch
>>> from transformers.models.deberta.modeling_deberta import XSoftmax
>>> # Make a tensor
>>> x = torch.randn([4,20,100])
>>> # Create a mask
>>> mask = (x>0).int()
>>> y = XSoftmax.apply(x, mask, dim=-1)
"""
@staticmethod
def forward(self, input, mask, dim):
self.dim = dim
if version.Version(torch.__version__) >= version.Version("1.2.0a"):
rmask = ~(mask.bool())
else:
rmask = (1 - mask).byte() # This line is not supported by Onnx tracing.
rmask = ~(mask.bool())
output = input.masked_fill(rmask, float("-inf"))
output = torch.softmax(output, self.dim)
@@ -127,10 +132,7 @@ def get_mask(input, local_context):
mask = local_context.mask if local_context.reuse_mask else None
if dropout > 0 and mask is None:
if version.Version(torch.__version__) >= version.Version("1.2.0a"):
mask = (1 - torch.empty_like(input).bernoulli_(1 - dropout)).bool()
else:
mask = (1 - torch.empty_like(input).bernoulli_(1 - dropout)).byte()
mask = (1 - torch.empty_like(input).bernoulli_(1 - dropout)).bool()
if isinstance(local_context, DropoutContext):
if local_context.mask is None:
@@ -166,9 +168,7 @@ class StableDropout(torch.nn.Module):
Optimized dropout module for stabilizing the training
Args:
drop_prob (float): the dropout probabilities
"""
def __init__(self, drop_prob):
@@ -183,8 +183,6 @@ class StableDropout(torch.nn.Module):
Args:
x (:obj:`torch.tensor`): The input tensor to apply dropout
"""
if self.training and self.drop_prob > 0:
return XDropout.apply(x, self.get_context())
@@ -302,7 +300,7 @@ class DebertaIntermediate(nn.Module):
class DebertaOutput(nn.Module):
def __init__(self, config):
super(DebertaOutput, self).__init__()
super().__init__()
self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
self.LayerNorm = DebertaLayerNorm(config.hidden_size, config.layer_norm_eps)
self.dropout = StableDropout(config.hidden_dropout_prob)
@@ -317,7 +315,7 @@ class DebertaOutput(nn.Module):
class DebertaLayer(nn.Module):
def __init__(self, config):
super(DebertaLayer, self).__init__()
super().__init__()
self.attention = DebertaAttention(config)
self.intermediate = DebertaIntermediate(config)
self.output = DebertaOutput(config)
@@ -701,7 +699,6 @@ class DebertaEmbeddings(nn.Module):
self.embed_proj = nn.Linear(self.embedding_size, config.hidden_size, bias=False)
self.LayerNorm = DebertaLayerNorm(config.hidden_size, config.layer_norm_eps)
self.dropout = StableDropout(config.hidden_dropout_prob)
self.output_to_half = False
self.config = config
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
@@ -763,6 +760,11 @@ class DebertaPreTrainedModel(PreTrainedModel):
config_class = DebertaConfig
base_model_prefix = "deberta"
_keys_to_ignore_on_load_missing = ["position_ids"]
_keys_to_ignore_on_load_unexpected = ["position_embeddings"]
def __init__(self, config):
super().__init__(config)
self._register_load_state_dict_pre_hook(self._pre_load_hook)
def _init_weights(self, module):
""" Initialize the weights """
@@ -773,6 +775,25 @@ class DebertaPreTrainedModel(PreTrainedModel):
if isinstance(module, nn.Linear) and module.bias is not None:
module.bias.data.zero_()
def _pre_load_hook(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
"""
Removes the classifier if it doesn't have the correct number of labels.
"""
self_state = self.state_dict()
if (
("classifier.weight" in self_state)
and ("classifier.weight" in state_dict)
and self_state["classifier.weight"].size() != state_dict["classifier.weight"].size()
):
logger.warning(
f"The checkpoint classifier head has a shape {state_dict['classifier.weight'].size()} and this model "
f"classifier head has a shape {self_state['classifier.weight'].size()}. Ignoring the checkpoint "
f"weights. You should train your model on new data."
)
del state_dict["classifier.weight"]
if "classifier.bias" in state_dict:
del state_dict["classifier.bias"]
DEBERTA_START_DOCSTRING = r"""
The DeBERTa model was proposed in `DeBERTa: Decoding-enhanced BERT with Disentangled Attention
@@ -867,7 +888,7 @@ class DebertaModel(DebertaPreTrainedModel):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/deberta-base",
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
)
@@ -953,7 +974,6 @@ class DebertaModel(DebertaPreTrainedModel):
@add_start_docstrings("""DeBERTa Model with a `language modeling` head on top. """, DEBERTA_START_DOCSTRING)
class DebertaForMaskedLM(DebertaPreTrainedModel):
_keys_to_ignore_on_load_unexpected = [r"pooler"]
_keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]
@@ -974,7 +994,7 @@ class DebertaForMaskedLM(DebertaPreTrainedModel):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/deberta-base",
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=MaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
)
@@ -1114,7 +1134,7 @@ class DebertaForSequenceClassification(DebertaPreTrainedModel):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/deberta-base",
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
)
@@ -1194,7 +1214,6 @@ class DebertaForSequenceClassification(DebertaPreTrainedModel):
DEBERTA_START_DOCSTRING,
)
class DebertaForTokenClassification(DebertaPreTrainedModel):
_keys_to_ignore_on_load_unexpected = [r"pooler"]
def __init__(self, config):
@@ -1210,7 +1229,7 @@ class DebertaForTokenClassification(DebertaPreTrainedModel):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/deberta-base",
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
)
@@ -1283,7 +1302,6 @@ class DebertaForTokenClassification(DebertaPreTrainedModel):
DEBERTA_START_DOCSTRING,
)
class DebertaForQuestionAnswering(DebertaPreTrainedModel):
_keys_to_ignore_on_load_unexpected = [r"pooler"]
def __init__(self, config):
@@ -1298,7 +1316,7 @@ class DebertaForQuestionAnswering(DebertaPreTrainedModel):
@add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/deberta-base",
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=QuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
)

View File

@@ -44,12 +44,20 @@ PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"microsoft/deberta-base": "https://huggingface.co/microsoft/deberta-base/resolve/main/bpe_encoder.bin",
"microsoft/deberta-large": "https://huggingface.co/microsoft/deberta-large/resolve/main/bpe_encoder.bin",
"microsoft/deberta-xlarge": "https://huggingface.co/microsoft/deberta-xlarge/resolve/main/bpe_encoder.bin",
"microsoft/deberta-base-mnli": "https://huggingface.co/microsoft/deberta-base-mnli/resolve/main/bpe_encoder.bin",
"microsoft/deberta-large-mnli": "https://huggingface.co/microsoft/deberta-large-mnli/resolve/main/bpe_encoder.bin",
"microsoft/deberta-xlarge-mnli": "https://huggingface.co/microsoft/deberta-xlarge-mnli/resolve/main/bpe_encoder.bin",
}
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"microsoft/deberta-base": 512,
"microsoft/deberta-large": 512,
"microsoft/deberta-xlarge": 512,
"microsoft/deberta-base-mnli": 512,
"microsoft/deberta-large-mnli": 512,
"microsoft/deberta-xlarge-mnli": 512,
}
PRETRAINED_INIT_CONFIGURATION = {

View File

@@ -0,0 +1,72 @@
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING
from ...file_utils import _BaseLazyModule, is_torch_available
_import_structure = {
"configuration_deberta_v2": ["DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP", "DebertaV2Config"],
"tokenization_deberta_v2": ["DebertaV2Tokenizer"],
}
if is_torch_available():
_import_structure["modeling_deberta_v2"] = [
"DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
"DebertaV2ForSequenceClassification",
"DebertaV2Model",
"DebertaV2ForMaskedLM",
"DebertaV2PreTrainedModel",
"DebertaV2ForTokenClassification",
"DebertaV2ForQuestionAnswering",
]
if TYPE_CHECKING:
from .configuration_deberta_v2 import DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP, DebertaV2Config
from .tokenization_deberta_v2 import DebertaV2Tokenizer
if is_torch_available():
from .modeling_deberta_v2 import (
DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST,
DebertaV2ForMaskedLM,
DebertaV2ForQuestionAnswering,
DebertaV2ForSequenceClassification,
DebertaV2ForTokenClassification,
DebertaV2Model,
DebertaV2PreTrainedModel,
)
else:
import importlib
import os
import sys
class _LazyModule(_BaseLazyModule):
"""
Module class that surfaces all objects but only performs associated imports when the objects are requested.
"""
__file__ = globals()["__file__"]
__path__ = [os.path.dirname(__file__)]
def _get_module(self, module_name: str):
return importlib.import_module("." + module_name, self.__name__)
sys.modules[__name__] = _LazyModule(__name__, _import_structure)

View File

@@ -0,0 +1,138 @@
# coding=utf-8
# Copyright 2020, Microsoft and the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" DeBERTa-v2 model configuration """
from ...configuration_utils import PretrainedConfig
from ...utils import logging
logger = logging.get_logger(__name__)
DEBERTA_V2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"microsoft/deberta-v2-xlarge": "https://huggingface.co/microsoft/deberta-v2-xlarge/resolve/main/config.json",
"microsoft/deberta-v2-xxlarge": "https://huggingface.co/microsoft/deberta-v2-xxlarge/resolve/main/config.json",
"microsoft/deberta-v2-xlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xlarge-mnli/resolve/main/config.json",
"microsoft/deberta-v2-xxlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xxlarge-mnli/resolve/main/config.json",
}
class DebertaV2Config(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a :class:`~transformers.DebertaV2Model`. It is used
to instantiate a DeBERTa-v2 model according to the specified arguments, defining the model architecture.
Instantiating a configuration with the defaults will yield a similar configuration to that of the DeBERTa
`microsoft/deberta-v2-xlarge <https://huggingface.co/microsoft/deberta-base>`__ architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
Arguments:
vocab_size (:obj:`int`, `optional`, defaults to 128100):
Vocabulary size of the DeBERTa-v2 model. Defines the number of different tokens that can be represented by
the :obj:`inputs_ids` passed when calling :class:`~transformers.DebertaV2Model`.
hidden_size (:obj:`int`, `optional`, defaults to 1536):
Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, `optional`, defaults to 24):
Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, `optional`, defaults to 24):
Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (:obj:`int`, `optional`, defaults to 6144):
Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`Callable`, `optional`, defaults to :obj:`"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"`, :obj:`"gelu"`, :obj:`"tanh"`, :obj:`"gelu_fast"`,
:obj:`"mish"`, :obj:`"linear"`, :obj:`"sigmoid"` and :obj:`"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, `optional`, defaults to 0):
The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.DebertaModel` or
:class:`~transformers.TFDebertaModel`.
initializer_range (:obj:`float`, `optional`, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-7):
The epsilon used by the layer normalization layers.
relative_attention (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether use relative position encoding.
max_relative_positions (:obj:`int`, `optional`, defaults to -1):
The range of relative positions :obj:`[-max_position_embeddings, max_position_embeddings]`. Use the same
value as :obj:`max_position_embeddings`.
pad_token_id (:obj:`int`, `optional`, defaults to 0):
The value used to pad input_ids.
position_biased_input (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether add absolute position embedding to content embedding.
pos_att_type (:obj:`List[str]`, `optional`):
The type of relative position attention, it can be a combination of :obj:`["p2c", "c2p", "p2p"]`, e.g.
:obj:`["p2c"]`, :obj:`["p2c", "c2p"]`, :obj:`["p2c", "c2p", 'p2p"]`.
layer_norm_eps (:obj:`float`, optional, defaults to 1e-12):
The epsilon used by the layer normalization layers.
"""
model_type = "deberta-v2"
def __init__(
self,
vocab_size=128100,
hidden_size=1536,
num_hidden_layers=24,
num_attention_heads=24,
intermediate_size=6144,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=0,
initializer_range=0.02,
layer_norm_eps=1e-7,
relative_attention=False,
max_relative_positions=-1,
pad_token_id=0,
position_biased_input=True,
pos_att_type=None,
pooler_dropout=0,
pooler_hidden_act="gelu",
**kwargs
):
super().__init__(**kwargs)
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.initializer_range = initializer_range
self.relative_attention = relative_attention
self.max_relative_positions = max_relative_positions
self.pad_token_id = pad_token_id
self.position_biased_input = position_biased_input
# Backwards compatibility
if type(pos_att_type) == str:
pos_att_type = [x.strip() for x in pos_att_type.lower().split("|")]
self.pos_att_type = pos_att_type
self.vocab_size = vocab_size
self.layer_norm_eps = layer_norm_eps
self.pooler_hidden_size = kwargs.get("pooler_hidden_size", hidden_size)
self.pooler_dropout = pooler_dropout
self.pooler_hidden_act = pooler_hidden_act

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,491 @@
# coding=utf-8
# Copyright 2020 Microsoft and the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Tokenization class for model DeBERTa."""
import os
import unicodedata
from typing import Optional, Tuple
import sentencepiece as sp
import six
from ...tokenization_utils import PreTrainedTokenizer
PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"microsoft/deberta-v2-xlarge": "https://huggingface.co/microsoft/deberta-v2-xlarge/resolve/main/spm.model",
"microsoft/deberta-v2-xxlarge": "https://huggingface.co/microsoft/deberta-v2-xxlarge/resolve/main/spm.model",
"microsoft/deberta-v2-xlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xlarge-mnli/resolve/main/spm.model",
"microsoft/deberta-v2-xxlarge-mnli": "https://huggingface.co/microsoft/deberta-v2-xxlarge-mnli/resolve/main/spm.model",
}
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"microsoft/deberta-v2-xlarge": 512,
"microsoft/deberta-v2-xxlarge": 512,
"microsoft/deberta-v2-xlarge-mnli": 512,
"microsoft/deberta-v2-xxlarge-mnli": 512,
}
PRETRAINED_INIT_CONFIGURATION = {
"microsoft/deberta-v2-xlarge": {"do_lower_case": False},
"microsoft/deberta-v2-xxlarge": {"do_lower_case": False},
"microsoft/deberta-v2-xlarge-mnli": {"do_lower_case": False},
"microsoft/deberta-v2-xxlarge-mnli": {"do_lower_case": False},
}
VOCAB_FILES_NAMES = {"vocab_file": "spm.model"}
class DebertaV2Tokenizer(PreTrainedTokenizer):
r"""
Constructs a DeBERTa-v2 tokenizer. Based on `SentencePiece <https://github.com/google/sentencepiece>`__.
Args:
vocab_file (:obj:`str`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that
contains the vocabulary necessary to instantiate a tokenizer.
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to lowercase the input when tokenizing.
unk_token (:obj:`str`, `optional`, defaults to :obj:`"[UNK]"`):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead.
sep_token (:obj:`str`, `optional`, defaults to :obj:`"[SEP]"`):
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
sequence classification or for a text and a question for question answering. It is also used as the last
token of a sequence built with special tokens.
pad_token (:obj:`str`, `optional`, defaults to :obj:`"[PAD]"`):
The token used for padding, for example when batching sequences of different lengths.
cls_token (:obj:`str`, `optional`, defaults to :obj:`"[CLS]"`):
The classifier token which is used when doing sequence classification (classification of the whole sequence
instead of per-token classification). It is the first token of the sequence when built with special tokens.
mask_token (:obj:`str`, `optional`, defaults to :obj:`"[MASK]"`):
The token used for masking values. This is the token used when training this model with masked language
modeling. This is the token which the model will try to predict.
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
def __init__(
self,
vocab_file,
do_lower_case=False,
split_by_punct=False,
unk_token="[UNK]",
sep_token="[SEP]",
pad_token="[PAD]",
cls_token="[CLS]",
mask_token="[MASK]",
**kwargs
):
super().__init__(
do_lower_case=do_lower_case,
unk_token=unk_token,
sep_token=sep_token,
pad_token=pad_token,
cls_token=cls_token,
mask_token=mask_token,
split_by_punct=split_by_punct,
**kwargs,
)
if not os.path.isfile(vocab_file):
raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = DebertaV2Tokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
)
self.do_lower_case = do_lower_case
self.split_by_punct = split_by_punct
self._tokenizer = SPMTokenizer(vocab_file, split_by_punct=split_by_punct)
@property
def vocab_size(self):
return len(self.vocab)
@property
def vocab(self):
return self._tokenizer.vocab
def get_vocab(self):
vocab = self.vocab.copy()
vocab.update(self.get_added_vocab())
return vocab
def _tokenize(self, text):
"""Take as input a string and return a list of strings (tokens) for words/sub-words"""
if self.do_lower_case:
text = text.lower()
return self._tokenizer.tokenize(text)
def _convert_token_to_id(self, token):
""" Converts a token (str) in an id using the vocab. """
return self._tokenizer.spm.PieceToId(token)
def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (str) using the vocab."""
return self._tokenizer.spm.IdToPiece(index) if index < self.vocab_size else self.unk_token
def convert_tokens_to_string(self, tokens):
""" Converts a sequence of tokens (string) in a single string. """
return self._tokenizer.decode(tokens)
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
"""
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A DeBERTa sequence has the following format:
- single sequence: [CLS] X [SEP]
- pair of sequences: [CLS] A [SEP] B [SEP]
Args:
token_ids_0 (:obj:`List[int]`):
List of IDs to which the special tokens will be added.
token_ids_1 (:obj:`List[int]`, `optional`):
Optional second list of IDs for sequence pairs.
Returns:
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
"""
if token_ids_1 is None:
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
cls = [self.cls_token_id]
sep = [self.sep_token_id]
return cls + token_ids_0 + sep + token_ids_1 + sep
def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_special_tokens=False):
"""
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods.
Args:
token_ids_0 (:obj:`List[int]`):
List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`):
Optional second list of IDs for sequence pairs.
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not the token list is already formatted with special tokens for the model.
Returns:
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
"""
if already_has_special_tokens:
if token_ids_1 is not None:
raise ValueError(
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
)
return list(
map(
lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0,
token_ids_0,
)
)
if token_ids_1 is not None:
return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
return [1] + ([0] * len(token_ids_0)) + [1]
def create_token_type_ids_from_sequences(self, token_ids_0, token_ids_1=None):
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A DeBERTa
sequence pair mask has the following format:
::
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence | second sequence |
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s).
Args:
token_ids_0 (:obj:`List[int]`):
List of IDs.
token_ids_1 (:obj:`List[int]`, `optional`):
Optional second list of IDs for sequence pairs.
Returns:
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
sequence(s).
"""
sep = [self.sep_token_id]
cls = [self.cls_token_id]
if token_ids_1 is None:
return len(cls + token_ids_0 + sep) * [0]
return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
add_prefix_space = kwargs.pop("add_prefix_space", False)
if is_split_into_words or add_prefix_space:
text = " " + text
return (text, kwargs)
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
return self._tokenizer.save_pretrained(save_directory, filename_prefix=filename_prefix)
class SPMTokenizer:
def __init__(self, vocab_file, split_by_punct=False):
self.split_by_punct = split_by_punct
self.vocab_file = vocab_file
spm = sp.SentencePieceProcessor()
assert os.path.exists(vocab_file)
spm.load(vocab_file)
bpe_vocab_size = spm.GetPieceSize()
# Token map
# <unk> 0+1
# <s> 1+1
# </s> 2+1
self.vocab = {spm.IdToPiece(i): i for i in range(bpe_vocab_size)}
self.id_to_tokens = [spm.IdToPiece(i) for i in range(bpe_vocab_size)]
# self.vocab['[PAD]'] = 0
# self.vocab['[CLS]'] = 1
# self.vocab['[SEP]'] = 2
# self.vocab['[UNK]'] = 3
self.spm = spm
def __getstate__(self):
state = self.__dict__.copy()
state["spm"] = None
return state
def __setstate__(self, d):
self.__dict__ = d
self.spm = sp.SentencePieceProcessor()
self.spm.Load(self.vocab_file)
def tokenize(self, text):
pieces = self._encode_as_pieces(text)
def _norm(x):
if x not in self.vocab or x == "<unk>":
return "[UNK]"
else:
return x
pieces = [_norm(p) for p in pieces]
return pieces
def convert_ids_to_tokens(self, ids):
tokens = []
for i in ids:
tokens.append(self.ids_to_tokens[i])
return tokens
def decode(self, tokens, start=-1, end=-1, raw_text=None):
if raw_text is None:
return self.spm.decode_pieces([t for t in tokens])
else:
words = self.split_to_words(raw_text)
word_tokens = [self.tokenize(w) for w in words]
token2words = [0] * len(tokens)
tid = 0
for i, w in enumerate(word_tokens):
for k, t in enumerate(w):
token2words[tid] = i
tid += 1
word_start = token2words[start]
word_end = token2words[end] if end < len(tokens) else len(words)
text = "".join(words[word_start:word_end])
return text
def add_special_token(self, token):
if token not in self.special_tokens:
self.special_tokens.append(token)
if token not in self.vocab:
self.vocab[token] = len(self.vocab) - 1
self.id_to_tokens.append(token)
return self.id(token)
def part_of_whole_word(self, token, is_bos=False):
if is_bos:
return True
if (
len(token) == 1
and (_is_whitespace(list(token)[0]) or _is_control(list(token)[0]) or _is_punctuation(list(token)[0]))
) or token in self.special_tokens:
return False
word_start = b"\xe2\x96\x81".decode("utf-8")
return not token.startswith(word_start)
def pad(self):
return "[PAD]"
def bos(self):
return "[CLS]"
def eos(self):
return "[SEP]"
def unk(self):
return "[UNK]"
def mask(self):
return "[MASK]"
def sym(self, id):
return self.ids_to_tokens[id]
def id(self, sym):
return self.vocab[sym] if sym in self.vocab else 1
def _encode_as_pieces(self, text):
text = convert_to_unicode(text)
if self.split_by_punct:
words = self._run_split_on_punc(text)
pieces = [self.spm.encode_as_pieces(w) for w in words]
return [p for w in pieces for p in w]
else:
return self.spm.encode_as_pieces(text)
def split_to_words(self, text):
pieces = self._encode_as_pieces(text)
word_start = b"\xe2\x96\x81".decode("utf-8")
words = []
offset = 0
prev_end = 0
for i, p in enumerate(pieces):
if p.startswith(word_start):
if offset > prev_end:
words.append(text[prev_end:offset])
prev_end = offset
w = p.replace(word_start, "")
else:
w = p
try:
s = text.index(w, offset)
pn = ""
k = i + 1
while k < len(pieces):
pn = pieces[k].replace(word_start, "")
if len(pn) > 0:
break
k += 1
if len(pn) > 0 and pn in text[offset:s]:
offset = offset + 1
else:
offset = s + len(w)
except Exception:
offset = offset + 1
if prev_end < offset:
words.append(text[prev_end:offset])
return words
def _run_strip_accents(self, text):
"""Strips accents from a piece of text."""
text = unicodedata.normalize("NFD", text)
output = []
for char in text:
cat = unicodedata.category(char)
if cat == "Mn":
continue
output.append(char)
return "".join(output)
def _run_split_on_punc(self, text):
"""Splits punctuation on a piece of text."""
chars = list(text)
i = 0
start_new_word = True
output = []
while i < len(chars):
char = chars[i]
if _is_punctuation(char):
output.append([char])
start_new_word = True
else:
if start_new_word:
output.append([])
start_new_word = False
output[-1].append(char)
i += 1
return ["".join(x) for x in output]
def save_pretrained(self, path: str, filename_prefix: str = None):
filename = VOCAB_FILES_NAMES[list(VOCAB_FILES_NAMES.keys())[0]]
if filename_prefix is not None:
filename = filename_prefix + "-" + filename
full_path = os.path.join(path, filename)
with open(full_path, "wb") as fs:
fs.write(self.spm.serialized_model_proto())
return (full_path,)
def _is_whitespace(char):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them
# as whitespace since they are generally considered as such.
if char == " " or char == "\t" or char == "\n" or char == "\r":
return True
cat = unicodedata.category(char)
if cat == "Zs":
return True
return False
def _is_control(char):
"""Checks whether `chars` is a control character."""
# These are technically control characters but we count them as whitespace
# characters.
if char == "\t" or char == "\n" or char == "\r":
return False
cat = unicodedata.category(char)
if cat.startswith("C"):
return True
return False
def _is_punctuation(char):
"""Checks whether `chars` is a punctuation character."""
cp = ord(char)
# We treat all non-letter/number ASCII as punctuation.
# Characters such as "^", "$", and "`" are not in the Unicode
# Punctuation class but we treat them as punctuation anyways, for
# consistency.
if (cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126):
return True
cat = unicodedata.category(char)
if cat.startswith("P"):
return True
return False
def convert_to_unicode(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if six.PY3:
if isinstance(text, str):
return text
elif isinstance(text, bytes):
return text.decode("utf-8", "ignore")
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
elif six.PY2:
if isinstance(text, str):
return text.decode("utf-8", "ignore")
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
else:
raise ValueError("Not running on Python2 or Python 3?")

View File

@@ -883,6 +883,63 @@ class DebertaPreTrainedModel:
requires_pytorch(self)
DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST = None
class DebertaV2ForMaskedLM:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class DebertaV2ForQuestionAnswering:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class DebertaV2ForSequenceClassification:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class DebertaV2ForTokenClassification:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class DebertaV2Model:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
class DebertaV2PreTrainedModel:
def __init__(self, *args, **kwargs):
requires_pytorch(self)
@classmethod
def from_pretrained(self, *args, **kwargs):
requires_pytorch(self)
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = None