Funnel transformer (#6908)
* Initial model * Fix upsampling * Add special cls token id and test * Formatting * Test and fist FunnelTokenizerFast * Common tests * Fix the check_repo script and document Funnel * Doc fixes * Add all models * Write doc * Fix test * Initial model * Fix upsampling * Add special cls token id and test * Formatting * Test and fist FunnelTokenizerFast * Common tests * Fix the check_repo script and document Funnel * Doc fixes * Add all models * Write doc * Fix test * Fix copyright * Forgot some layers can be repeated * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Update src/transformers/modeling_funnel.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Address review comments * Update src/transformers/modeling_funnel.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Address review comments * Update src/transformers/modeling_funnel.py Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * Slow integration test * Make small integration test * Formatting * Add checkpoint and separate classification head * Formatting * Expand list, fix link and add in pretrained models * Styling * Add the model in all summaries * Typo fixes Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
This commit is contained in:
@@ -29,6 +29,7 @@ from .configuration_dpr import DPR_PRETRAINED_CONFIG_ARCHIVE_MAP, DPRConfig
|
||||
from .configuration_electra import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, ElectraConfig
|
||||
from .configuration_encoder_decoder import EncoderDecoderConfig
|
||||
from .configuration_flaubert import FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FlaubertConfig
|
||||
from .configuration_funnel import FUNNEL_PRETRAINED_CONFIG_ARCHIVE_MAP, FunnelConfig
|
||||
from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
|
||||
from .configuration_longformer import LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, LongformerConfig
|
||||
from .configuration_lxmert import LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP, LxmertConfig
|
||||
@@ -155,6 +156,7 @@ from .tokenization_dpr import (
|
||||
)
|
||||
from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
|
||||
from .tokenization_flaubert import FlaubertTokenizer
|
||||
from .tokenization_funnel import FunnelTokenizer, FunnelTokenizerFast
|
||||
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
|
||||
from .tokenization_longformer import LongformerTokenizer, LongformerTokenizerFast
|
||||
from .tokenization_lxmert import LxmertTokenizer, LxmertTokenizerFast
|
||||
@@ -327,6 +329,18 @@ if is_torch_available():
|
||||
FlaubertModel,
|
||||
FlaubertWithLMHeadModel,
|
||||
)
|
||||
from .modeling_funnel import (
|
||||
FUNNEL_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
FunnelBaseModel,
|
||||
FunnelForMaskedLM,
|
||||
FunnelForMultipleChoice,
|
||||
FunnelForPreTraining,
|
||||
FunnelForQuestionAnswering,
|
||||
FunnelForSequenceClassification,
|
||||
FunnelForTokenClassification,
|
||||
FunnelModel,
|
||||
load_tf_weights_in_funnel,
|
||||
)
|
||||
from .modeling_gpt2 import (
|
||||
GPT2_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
GPT2DoubleHeadsModel,
|
||||
|
||||
@@ -15,6 +15,12 @@ def convert_command_factory(args: Namespace):
|
||||
)
|
||||
|
||||
|
||||
IMPORT_ERROR_MESSAGE = """transformers can only be used from the commandline to convert TensorFlow models in PyTorch,
|
||||
In that case, it requires TensorFlow to be installed. Please see
|
||||
https://www.tensorflow.org/install/ for installation instructions.
|
||||
"""
|
||||
|
||||
|
||||
class ConvertCommand(BaseTransformersCLICommand):
|
||||
@staticmethod
|
||||
def register_subcommand(parser: ArgumentParser):
|
||||
@@ -69,12 +75,7 @@ class ConvertCommand(BaseTransformersCLICommand):
|
||||
convert_tf_checkpoint_to_pytorch,
|
||||
)
|
||||
except ImportError:
|
||||
msg = (
|
||||
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
|
||||
"In that case, it requires TensorFlow to be installed. Please see "
|
||||
"https://www.tensorflow.org/install/ for installation instructions."
|
||||
)
|
||||
raise ImportError(msg)
|
||||
raise ImportError(IMPORT_ERROR_MESSAGE)
|
||||
|
||||
convert_tf_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
|
||||
elif self._model_type == "bert":
|
||||
@@ -83,12 +84,16 @@ class ConvertCommand(BaseTransformersCLICommand):
|
||||
convert_tf_checkpoint_to_pytorch,
|
||||
)
|
||||
except ImportError:
|
||||
msg = (
|
||||
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
|
||||
"In that case, it requires TensorFlow to be installed. Please see "
|
||||
"https://www.tensorflow.org/install/ for installation instructions."
|
||||
raise ImportError(IMPORT_ERROR_MESSAGE)
|
||||
|
||||
convert_tf_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
|
||||
elif self._model_type == "funnel":
|
||||
try:
|
||||
from transformers.convert_funnel_original_tf_checkpoint_to_pytorch import (
|
||||
convert_tf_checkpoint_to_pytorch,
|
||||
)
|
||||
raise ImportError(msg)
|
||||
except ImportError:
|
||||
raise ImportError(IMPORT_ERROR_MESSAGE)
|
||||
|
||||
convert_tf_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
|
||||
elif self._model_type == "gpt":
|
||||
@@ -103,12 +108,7 @@ class ConvertCommand(BaseTransformersCLICommand):
|
||||
convert_transfo_xl_checkpoint_to_pytorch,
|
||||
)
|
||||
except ImportError:
|
||||
msg = (
|
||||
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
|
||||
"In that case, it requires TensorFlow to be installed. Please see "
|
||||
"https://www.tensorflow.org/install/ for installation instructions."
|
||||
)
|
||||
raise ImportError(msg)
|
||||
raise ImportError(IMPORT_ERROR_MESSAGE)
|
||||
|
||||
if "ckpt" in self._tf_checkpoint.lower():
|
||||
TF_CHECKPOINT = self._tf_checkpoint
|
||||
@@ -125,12 +125,7 @@ class ConvertCommand(BaseTransformersCLICommand):
|
||||
convert_gpt2_checkpoint_to_pytorch,
|
||||
)
|
||||
except ImportError:
|
||||
msg = (
|
||||
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
|
||||
"In that case, it requires TensorFlow to be installed. Please see "
|
||||
"https://www.tensorflow.org/install/ for installation instructions."
|
||||
)
|
||||
raise ImportError(msg)
|
||||
raise ImportError(IMPORT_ERROR_MESSAGE)
|
||||
|
||||
convert_gpt2_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
|
||||
elif self._model_type == "xlnet":
|
||||
@@ -139,12 +134,7 @@ class ConvertCommand(BaseTransformersCLICommand):
|
||||
convert_xlnet_checkpoint_to_pytorch,
|
||||
)
|
||||
except ImportError:
|
||||
msg = (
|
||||
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
|
||||
"In that case, it requires TensorFlow to be installed. Please see "
|
||||
"https://www.tensorflow.org/install/ for installation instructions."
|
||||
)
|
||||
raise ImportError(msg)
|
||||
raise ImportError(IMPORT_ERROR_MESSAGE)
|
||||
|
||||
convert_xlnet_checkpoint_to_pytorch(
|
||||
self._tf_checkpoint, self._config, self._pytorch_dump_output, self._finetuning_task_name
|
||||
|
||||
@@ -26,6 +26,7 @@ from .configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
from .configuration_electra import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, ElectraConfig
|
||||
from .configuration_encoder_decoder import EncoderDecoderConfig
|
||||
from .configuration_flaubert import FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FlaubertConfig
|
||||
from .configuration_funnel import FUNNEL_PRETRAINED_CONFIG_ARCHIVE_MAP, FunnelConfig
|
||||
from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
|
||||
from .configuration_longformer import LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, LongformerConfig
|
||||
from .configuration_lxmert import LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP, LxmertConfig
|
||||
@@ -67,6 +68,7 @@ ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
|
||||
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
FUNNEL_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
|
||||
]
|
||||
for key, value, in pretrained_map.items()
|
||||
@@ -168,6 +170,10 @@ CONFIG_MAPPING = OrderedDict(
|
||||
"encoder-decoder",
|
||||
EncoderDecoderConfig,
|
||||
),
|
||||
(
|
||||
"funnel",
|
||||
FunnelConfig,
|
||||
),
|
||||
(
|
||||
"lxmert",
|
||||
LxmertConfig,
|
||||
@@ -230,6 +236,7 @@ class AutoConfig:
|
||||
- `ctrl` : :class:`~transformers.CTRLConfig` (CTRL model)
|
||||
- `flaubert` : :class:`~transformers.FlaubertConfig` (Flaubert model)
|
||||
- `electra` : :class:`~transformers.ElectraConfig` (ELECTRA model)
|
||||
- `funnel`: :class:`~transformers.FunnelConfig` (Funnel Transformer model)
|
||||
|
||||
Args:
|
||||
pretrained_model_name_or_path (:obj:`string`):
|
||||
|
||||
183
src/transformers/configuration_funnel.py
Normal file
183
src/transformers/configuration_funnel.py
Normal file
@@ -0,0 +1,183 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020, Hugging Face
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" Funnel Transformer model configuration """
|
||||
|
||||
from .configuration_utils import PretrainedConfig
|
||||
from .utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
FUNNEL_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||
"funnel-transformer/small": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small/config.json",
|
||||
"funnel-transformer/small-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small-base/config.json",
|
||||
"funnel-transformer/medium": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium/config.json",
|
||||
"funnel-transformer/medium-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium-base/config.json",
|
||||
"funnel-transformer/intermediate": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate/config.json",
|
||||
"funnel-transformer/intermediate-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate-base/config.json",
|
||||
"funnel-transformer/large": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large/config.json",
|
||||
"funnel-transformer/large-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large-base/config.json",
|
||||
"funnel-transformer/xlarge": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge/config.json",
|
||||
"funnel-transformer/xlarge-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge-base/config.json",
|
||||
}
|
||||
|
||||
|
||||
class FunnelConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a :class:`~transformers.FunnelModel`.
|
||||
It is used to instantiate an Funnel Transformer model according to the specified arguments, defining the model
|
||||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
|
||||
the Funnel Transformer `funnel-transformer/small <https://huggingface.co/funnel-transformer/small>`__ architecture.
|
||||
|
||||
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
|
||||
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
|
||||
for more information.
|
||||
|
||||
|
||||
Args:
|
||||
vocab_size (:obj:`int`, `optional`, defaults to 30522):
|
||||
Vocabulary size of the Funnel transformer. Defines the different tokens that
|
||||
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.FunnelModel`.
|
||||
block_sizes (:obj:`List[int]`, `optional`, defaults to :obj:`[4, 4, 4]`):
|
||||
The sizes of the blocks used in the model.
|
||||
block_repeats (:obj:`List[int]`, `optional`):
|
||||
If passed along, each layer of each block is repeated the number of times indicated.
|
||||
num_decoder_layers (:obj:`int`, `optional`, defaults to 2):
|
||||
The number of layers in the decoder (when not using the base model).
|
||||
d_model (:obj:`int`, `optional`, defaults to 768):
|
||||
Dimensionality of the model's hidden states.
|
||||
n_head (:obj:`int`, `optional`, defaults to 12):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
d_head (:obj:`int`, `optional`, defaults to 64):
|
||||
Dimensionality of the model's heads.
|
||||
d_inner (:obj:`int`, `optional`, defaults to 3072):
|
||||
Inner dimension in the feed-forward blocks.
|
||||
hidden_act (:obj:`str` or :obj:`callable`, `optional`, defaults to :obj:`"gelu_new"`):
|
||||
The non-linear activation function (function or string) in the encoder and pooler.
|
||||
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
|
||||
hidden_dropout (:obj:`float`, `optional`, defaults to 0.1):
|
||||
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_dropout (:obj:`float`, `optional`, defaults to 0.1):
|
||||
The dropout probability for the attention probabilities.
|
||||
activation_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||
The dropout probability used between the two layers of the feed-forward blocks.
|
||||
max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
||||
type_vocab_size (:obj:`int`, `optional`, defaults to 3):
|
||||
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.FunnelModel`.
|
||||
initializer_range (:obj:`float`, `optional`, defaults to 0.1):
|
||||
The standard deviation of the `uniform initializer` for initializing all weight matrices in attention
|
||||
layers.
|
||||
initializer_std (:obj:`float`, `optional`):
|
||||
The standard deviation of the `normal initializer` for initializing the embedding matrix and the weight of
|
||||
linear layers. Will default to 1 for the embedding matrix and the value given by Xavier initialization for
|
||||
linear layers.
|
||||
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-9):
|
||||
The epsilon used by the layer normalization layers.
|
||||
pooling_type (:obj:`str`, `optional`, defaults to :obj:`"mean"`):
|
||||
Possible values are ``"mean"`` or ``"max"``. The way pooling is performed at the beginning of each
|
||||
block.
|
||||
attention_type (:obj:`str`, `optional`, defaults to :obj:`"relative_shift"`):
|
||||
Possible values are ``"relative_shift"`` or ``"factorized"``. The former is faster on CPU/GPU while
|
||||
the latter is faster on TPU.
|
||||
separate_cls (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||
Whether or not to separate the cls token when applying pooling.
|
||||
truncate_seq (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
When using ``separate_cls``, whether or not to truncate the last token when pooling, to avoid getting
|
||||
a sequence length that is not a multiple of 2.
|
||||
pool_q_only (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
Whether or not to apply the pooling only to the query or to query, key and values for the attention
|
||||
layers.
|
||||
"""
|
||||
model_type = "funnel"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=30522,
|
||||
block_sizes=[4, 4, 4],
|
||||
block_repeats=None,
|
||||
num_decoder_layers=2,
|
||||
d_model=768,
|
||||
n_head=12,
|
||||
d_head=64,
|
||||
d_inner=3072,
|
||||
hidden_act="gelu_new",
|
||||
hidden_dropout=0.1,
|
||||
attention_dropout=0.1,
|
||||
activation_dropout=0.0,
|
||||
max_position_embeddings=512,
|
||||
type_vocab_size=3,
|
||||
initializer_range=0.1,
|
||||
initializer_std=None,
|
||||
layer_norm_eps=1e-9,
|
||||
pooling_type="mean",
|
||||
attention_type="relative_shift",
|
||||
separate_cls=True,
|
||||
truncate_seq=True,
|
||||
pool_q_only=True,
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.vocab_size = vocab_size
|
||||
self.block_sizes = block_sizes
|
||||
self.block_repeats = [1] * len(block_sizes) if block_repeats is None else block_repeats
|
||||
assert len(block_sizes) == len(
|
||||
self.block_repeats
|
||||
), "`block_sizes` and `block_repeats` should have the same length."
|
||||
self.num_decoder_layers = num_decoder_layers
|
||||
self.d_model = d_model
|
||||
self.n_head = n_head
|
||||
self.d_head = d_head
|
||||
self.d_inner = d_inner
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout = hidden_dropout
|
||||
self.attention_dropout = attention_dropout
|
||||
self.activation_dropout = activation_dropout
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.initializer_range = initializer_range
|
||||
self.initializer_std = initializer_std
|
||||
self.layer_norm_eps = layer_norm_eps
|
||||
assert pooling_type in [
|
||||
"mean",
|
||||
"max",
|
||||
], f"Got {pooling_type} for `pooling_type` but only 'mean' and 'max' are supported."
|
||||
self.pooling_type = pooling_type
|
||||
assert attention_type in [
|
||||
"relative_shift",
|
||||
"factorized",
|
||||
], f"Got {attention_type} for `attention_type` but only 'relative_shift' and 'factorized' are supported."
|
||||
self.attention_type = attention_type
|
||||
self.separate_cls = separate_cls
|
||||
self.truncate_seq = truncate_seq
|
||||
self.pool_q_only = pool_q_only
|
||||
|
||||
@property
|
||||
def hidden_size(self):
|
||||
return self.d_model
|
||||
|
||||
@property
|
||||
def num_attention_heads(self):
|
||||
return self.n_head
|
||||
|
||||
@property
|
||||
def num_hidden_layers(self):
|
||||
return sum(self.block_sizes)
|
||||
|
||||
@property
|
||||
def num_blocks(self):
|
||||
return len(self.block_sizes)
|
||||
61
src/transformers/convert_funnel_original_tf_checkpoint_to_pytorch.py
Executable file
61
src/transformers/convert_funnel_original_tf_checkpoint_to_pytorch.py
Executable file
@@ -0,0 +1,61 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Convert Funnel checkpoint."""
|
||||
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
import torch
|
||||
|
||||
from transformers import FunnelConfig, FunnelForPreTraining, load_tf_weights_in_funnel
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
|
||||
# Initialise PyTorch model
|
||||
config = FunnelConfig.from_json_file(config_file)
|
||||
print("Building PyTorch model from configuration: {}".format(str(config)))
|
||||
model = FunnelForPreTraining(config)
|
||||
|
||||
# Load weights from tf checkpoint
|
||||
load_tf_weights_in_funnel(model, config, tf_checkpoint_path)
|
||||
|
||||
# Save pytorch-model
|
||||
print("Save PyTorch model to {}".format(pytorch_dump_path))
|
||||
torch.save(model.state_dict(), pytorch_dump_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
# Required parameters
|
||||
parser.add_argument(
|
||||
"--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config_file",
|
||||
default=None,
|
||||
type=str,
|
||||
required=True,
|
||||
help="The config json file corresponding to the pre-trained model. \n"
|
||||
"This specifies the model architecture.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
|
||||
@@ -29,6 +29,7 @@ from .configuration_auto import (
|
||||
ElectraConfig,
|
||||
EncoderDecoderConfig,
|
||||
FlaubertConfig,
|
||||
FunnelConfig,
|
||||
GPT2Config,
|
||||
LongformerConfig,
|
||||
LxmertConfig,
|
||||
@@ -108,6 +109,14 @@ from .modeling_flaubert import (
|
||||
FlaubertModel,
|
||||
FlaubertWithLMHeadModel,
|
||||
)
|
||||
from .modeling_funnel import (
|
||||
FunnelForMaskedLM,
|
||||
FunnelForMultipleChoice,
|
||||
FunnelForQuestionAnswering,
|
||||
FunnelForSequenceClassification,
|
||||
FunnelForTokenClassification,
|
||||
FunnelModel,
|
||||
)
|
||||
from .modeling_gpt2 import GPT2LMHeadModel, GPT2Model
|
||||
from .modeling_longformer import (
|
||||
LongformerForMaskedLM,
|
||||
@@ -202,6 +211,7 @@ MODEL_MAPPING = OrderedDict(
|
||||
(CTRLConfig, CTRLModel),
|
||||
(ElectraConfig, ElectraModel),
|
||||
(ReformerConfig, ReformerModel),
|
||||
(FunnelConfig, FunnelModel),
|
||||
(LxmertConfig, LxmertModel),
|
||||
]
|
||||
)
|
||||
@@ -254,6 +264,7 @@ MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
|
||||
(ElectraConfig, ElectraForMaskedLM),
|
||||
(EncoderDecoderConfig, EncoderDecoderModel),
|
||||
(ReformerConfig, ReformerModelWithLMHead),
|
||||
(FunnelConfig, FunnelForMaskedLM),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -291,6 +302,7 @@ MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
|
||||
(XLMConfig, XLMWithLMHeadModel),
|
||||
(ElectraConfig, ElectraForMaskedLM),
|
||||
(ReformerConfig, ReformerForMaskedLM),
|
||||
(FunnelConfig, FunnelForMaskedLM),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -320,6 +332,7 @@ MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
|
||||
(FlaubertConfig, FlaubertForSequenceClassification),
|
||||
(XLMConfig, XLMForSequenceClassification),
|
||||
(ElectraConfig, ElectraForSequenceClassification),
|
||||
(FunnelConfig, FunnelForSequenceClassification),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -339,6 +352,7 @@ MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
|
||||
(XLMConfig, XLMForQuestionAnsweringSimple),
|
||||
(ElectraConfig, ElectraForQuestionAnswering),
|
||||
(ReformerConfig, ReformerForQuestionAnswering),
|
||||
(FunnelConfig, FunnelForQuestionAnswering),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -357,6 +371,7 @@ MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
|
||||
(AlbertConfig, AlbertForTokenClassification),
|
||||
(ElectraConfig, ElectraForTokenClassification),
|
||||
(FlaubertConfig, FlaubertForTokenClassification),
|
||||
(FunnelConfig, FunnelForTokenClassification),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -374,6 +389,7 @@ MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
|
||||
(AlbertConfig, AlbertForMultipleChoice),
|
||||
(XLMConfig, XLMForMultipleChoice),
|
||||
(FlaubertConfig, FlaubertForMultipleChoice),
|
||||
(FunnelConfig, FunnelForMultipleChoice),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -421,6 +437,7 @@ class AutoModel:
|
||||
- isInstance of `xlm` configuration class: :class:`~transformers.XLMModel` (XLM model)
|
||||
- isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertModel` (Flaubert model)
|
||||
- isInstance of `electra` configuration class: :class:`~transformers.ElectraModel` (Electra model)
|
||||
- isInstance of `funnel` configuration class: :class:`~transformers.FunnelModel` (Funnel Transformer model)
|
||||
|
||||
Examples::
|
||||
|
||||
@@ -462,6 +479,7 @@ class AutoModel:
|
||||
- `ctrl`: :class:`~transformers.CTRLModel` (Salesforce CTRL model)
|
||||
- `flaubert`: :class:`~transformers.FlaubertModel` (Flaubert model)
|
||||
- `electra`: :class:`~transformers.ElectraModel` (Electra model)
|
||||
- `funnel`: :class:`~transformers.FunnelModel` (Funnel Transformer model)
|
||||
|
||||
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
|
||||
To train the model, you should first set it back in training mode with `model.train()`
|
||||
@@ -729,6 +747,7 @@ class AutoModelWithLMHead:
|
||||
- isInstance of `xlm` configuration class: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
|
||||
- isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
|
||||
- isInstance of `electra` configuration class: :class:`~transformers.ElectraForMaskedLM` (Electra model)
|
||||
- isInstance of `funnel` configuration class: :class:`~transformers.FunnelForMaskedLM` (Funnel Transformer model)
|
||||
|
||||
Examples::
|
||||
|
||||
@@ -774,6 +793,7 @@ class AutoModelWithLMHead:
|
||||
- `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
|
||||
- `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
|
||||
- `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model)
|
||||
- `funnel`: :class:`~transformers.FunnelForMaskedLM` (Funnel Transformer model)
|
||||
|
||||
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
|
||||
To train the model, you should first set it back in training mode with `model.train()`
|
||||
@@ -1024,6 +1044,7 @@ class AutoModelForMaskedLM:
|
||||
- isInstance of `electra` configuration class: :class:`~transformers.ElectraForMaskedLM` (Electra model)
|
||||
- isInstance of `camembert` configuration class: :class:`~transformers.CamembertForMaskedLM` (Camembert model)
|
||||
- isInstance of `albert` configuration class: :class:`~transformers.AlbertForMaskedLM` (Albert model)
|
||||
- isInstance of `funnel` configuration class: :class:`~transformers.FunnelForMaskedLM` (Funnel Transformer model)
|
||||
|
||||
|
||||
Examples::
|
||||
@@ -1060,6 +1081,7 @@ class AutoModelForMaskedLM:
|
||||
- `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
|
||||
- `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model)
|
||||
- `bert`: :class:`~transformers.BertLMHeadModel` (Bert model)
|
||||
- `funnel`: :class:`~transformers.FunnelForMaskedLM` (Funnel Transformer model)
|
||||
|
||||
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
|
||||
To train the model, you should first set it back in training mode with `model.train()`
|
||||
@@ -1304,7 +1326,7 @@ class AutoModelForSequenceClassification:
|
||||
- isInstance of `xlnet` configuration class: :class:`~transformers.XLNetForSequenceClassification` (XLNet model)
|
||||
- isInstance of `xlm` configuration class: :class:`~transformers.XLMForSequenceClassification` (XLM model)
|
||||
- isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model)
|
||||
|
||||
- isInstance of `funnel` configuration class: :class:`~transformers.FunnelModelForSequenceClassification` (Funnel Transformer model)
|
||||
|
||||
Examples::
|
||||
|
||||
@@ -1340,6 +1362,7 @@ class AutoModelForSequenceClassification:
|
||||
- `bert`: :class:`~transformers.BertForSequenceClassification` (Bert model)
|
||||
- `xlnet`: :class:`~transformers.XLNetForSequenceClassification` (XLNet model)
|
||||
- `flaubert`: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model)
|
||||
- `funnel`: :class:`~transformers.FunnelForSequenceClassification` (Funnel Transformer model)
|
||||
|
||||
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
|
||||
To train the model, you should first set it back in training mode with `model.train()`
|
||||
@@ -1454,6 +1477,7 @@ class AutoModelForQuestionAnswering:
|
||||
- isInstance of `xlnet` configuration class: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model)
|
||||
- isInstance of `xlm` configuration class: :class:`~transformers.XLMForQuestionAnswering` (XLM model)
|
||||
- isInstance of `flaubert` configuration class: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model)
|
||||
- isInstance of `funnel` configuration class: :class:`~transformers.FunnelForQuestionAnswering` (Funnel Transformer model)
|
||||
|
||||
Examples::
|
||||
|
||||
@@ -1488,6 +1512,7 @@ class AutoModelForQuestionAnswering:
|
||||
- `xlnet`: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model)
|
||||
- `xlm`: :class:`~transformers.XLMForQuestionAnswering` (XLM model)
|
||||
- `flaubert`: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model)
|
||||
- `funnel`: :class:`~transformers.FunnelForQuestionAnswering` (Funnel Transformer model)
|
||||
|
||||
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
|
||||
To train the model, you should first set it back in training mode with `model.train()`
|
||||
@@ -1604,6 +1629,7 @@ class AutoModelForTokenClassification:
|
||||
- isInstance of `camembert` configuration class: :class:`~transformers.CamembertModelForTokenClassification` (Camembert model)
|
||||
- isInstance of `roberta` configuration class: :class:`~transformers.RobertaModelForTokenClassification` (Roberta model)
|
||||
- isInstance of `electra` configuration class: :class:`~transformers.ElectraForTokenClassification` (Electra model)
|
||||
- isInstance of `funnel` configuration class: :class:`~transformers.FunnelForTokenClassification` (Funnel Transformer model)
|
||||
|
||||
Examples::
|
||||
|
||||
@@ -1641,6 +1667,7 @@ class AutoModelForTokenClassification:
|
||||
- `flaubert`: :class:`~transformers.FlaubertForTokenClassification` (Flaubert model)
|
||||
- `roberta`: :class:`~transformers.RobertaForTokenClassification` (Roberta model)
|
||||
- `electra`: :class:`~transformers.ElectraForTokenClassification` (Electra model)
|
||||
- `funnel`: :class:`~transformers.FunnelForTokenClassification` (Funnel Transformer model)
|
||||
|
||||
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
|
||||
To train the model, you should first set it back in training mode with `model.train()`
|
||||
|
||||
1544
src/transformers/modeling_funnel.py
Normal file
1544
src/transformers/modeling_funnel.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -27,6 +27,7 @@ from .configuration_auto import (
|
||||
DistilBertConfig,
|
||||
ElectraConfig,
|
||||
FlaubertConfig,
|
||||
FunnelConfig,
|
||||
GPT2Config,
|
||||
LongformerConfig,
|
||||
LxmertConfig,
|
||||
@@ -54,6 +55,7 @@ from .tokenization_ctrl import CTRLTokenizer
|
||||
from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast
|
||||
from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
|
||||
from .tokenization_flaubert import FlaubertTokenizer
|
||||
from .tokenization_funnel import FunnelTokenizer, FunnelTokenizerFast
|
||||
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
|
||||
from .tokenization_longformer import LongformerTokenizer, LongformerTokenizerFast
|
||||
from .tokenization_lxmert import LxmertTokenizer, LxmertTokenizerFast
|
||||
@@ -93,6 +95,7 @@ TOKENIZER_MAPPING = OrderedDict(
|
||||
(RobertaConfig, (RobertaTokenizer, RobertaTokenizerFast)),
|
||||
(ReformerConfig, (ReformerTokenizer, None)),
|
||||
(ElectraConfig, (ElectraTokenizer, ElectraTokenizerFast)),
|
||||
(FunnelConfig, (FunnelTokenizer, FunnelTokenizerFast)),
|
||||
(LxmertConfig, (LxmertTokenizer, LxmertTokenizerFast)),
|
||||
(BertConfig, (BertTokenizer, BertTokenizerFast)),
|
||||
(OpenAIGPTConfig, (OpenAIGPTTokenizer, OpenAIGPTTokenizerFast)),
|
||||
@@ -131,6 +134,7 @@ class AutoTokenizer:
|
||||
- `xlm`: XLMTokenizer (XLM model)
|
||||
- `ctrl`: CTRLTokenizer (Salesforce CTRL model)
|
||||
- `electra`: ElectraTokenizer (Google ELECTRA model)
|
||||
- `funnel`: FunnelTokenizer (Funnel Transformer model)
|
||||
- `lxmert`: LxmertTokenizer (Lxmert model)
|
||||
|
||||
This class cannot be instantiated using `__init__()` (throw an error).
|
||||
@@ -167,6 +171,7 @@ class AutoTokenizer:
|
||||
- `xlm`: XLMTokenizer (XLM model)
|
||||
- `ctrl`: CTRLTokenizer (Salesforce CTRL model)
|
||||
- `electra`: ElectraTokenizer (Google ELECTRA model)
|
||||
- `funnel`: FunnelTokenizer (Funnel Transformer model)
|
||||
- `lxmert`: LxmertTokenizer (Lxmert model)
|
||||
|
||||
Params:
|
||||
|
||||
232
src/transformers/tokenization_funnel.py
Normal file
232
src/transformers/tokenization_funnel.py
Normal file
@@ -0,0 +1,232 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" Tokenization class for Funnel Transformer."""
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from .tokenization_bert import BertTokenizer, BertTokenizerFast
|
||||
from .utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
||||
|
||||
_model_names = [
|
||||
"small",
|
||||
"small-base",
|
||||
"medium",
|
||||
"medium-base",
|
||||
"intermediate",
|
||||
"intermediate-base",
|
||||
"large",
|
||||
"large-base",
|
||||
"xlarge",
|
||||
"xlarge-base",
|
||||
]
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"funnel-transformer/small": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small/vocab.txt",
|
||||
"funnel-transformer/small-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/small-base/vocab.txt",
|
||||
"funnel-transformer/medium": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium/vocab.txt",
|
||||
"funnel-transformer/medium-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/medium-base/vocab.txt",
|
||||
"funnel-transformer/intermediate": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate/vocab.txt",
|
||||
"funnel-transformer/intermediate-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/intermediate-base/vocab.txt",
|
||||
"funnel-transformer/large": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large/vocab.txt",
|
||||
"funnel-transformer/large-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/large-base/vocab.txt",
|
||||
"funnel-transformer/xlarge": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge/vocab.txt",
|
||||
"funnel-transformer/xlarge-base": "https://s3.amazonaws.com/models.huggingface.co/bert/funnel-transformer/xlarge-base/vocab.txt",
|
||||
}
|
||||
}
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {f"funnel-transformer/{name}": 512 for name in _model_names}
|
||||
PRETRAINED_INIT_CONFIGURATION = {f"funnel-transformer/{name}": {"do_lower_case": True} for name in _model_names}
|
||||
|
||||
|
||||
class FunnelTokenizer(BertTokenizer):
|
||||
r"""
|
||||
Tokenizer for the Funnel Transformer models.
|
||||
|
||||
:class:`~transformers.FunnelTokenizer` is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
|
||||
tokenization: punctuation splitting + wordpiece.
|
||||
|
||||
Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
cls_token_type_id: int = 2
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_file,
|
||||
do_lower_case=True,
|
||||
do_basic_tokenize=True,
|
||||
never_split=None,
|
||||
unk_token="<unk>",
|
||||
sep_token="<sep>",
|
||||
pad_token="<pad>",
|
||||
cls_token="<cls>",
|
||||
mask_token="<mask>",
|
||||
bos_token="<s>",
|
||||
eos_token="</s>",
|
||||
tokenize_chinese_chars=True,
|
||||
strip_accents=None,
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(
|
||||
vocab_file,
|
||||
do_lower_case=do_lower_case,
|
||||
do_basic_tokenize=do_basic_tokenize,
|
||||
never_split=never_split,
|
||||
unk_token=unk_token,
|
||||
sep_token=sep_token,
|
||||
pad_token=pad_token,
|
||||
cls_token=cls_token,
|
||||
mask_token=mask_token,
|
||||
bos_token=bos_token,
|
||||
eos_token=eos_token,
|
||||
tokenize_chinese_chars=tokenize_chinese_chars,
|
||||
strip_accents=strip_accents,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
|
||||
Funnel Transformer expects a sequence pair mask that has the following format:
|
||||
|
||||
::
|
||||
|
||||
2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
||||
| first sequence | second sequence |
|
||||
|
||||
if token_ids_1 is None, only returns the first portion of the mask (0's).
|
||||
|
||||
Args:
|
||||
token_ids_0 (:obj:`List[int]`):
|
||||
List of ids.
|
||||
token_ids_1 (:obj:`List[int]`, `optional`):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
|
||||
sequence(s).
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
if token_ids_1 is None:
|
||||
return len(cls) * [self.cls_token_type_id] + len(token_ids_0 + sep) * [0]
|
||||
return len(cls) * [self.cls_token_type_id] + len(token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]
|
||||
|
||||
|
||||
class FunnelTokenizerFast(BertTokenizerFast):
|
||||
r"""
|
||||
"Fast" tokenizer for the Funnel Transformer models (backed by HuggingFace's :obj:`tokenizers` library).
|
||||
|
||||
:class:`~transformers.FunnelTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs
|
||||
end-to-end tokenization: punctuation splitting + wordpiece.
|
||||
|
||||
Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
cls_token_type_id: int = 2
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_file,
|
||||
do_lower_case=True,
|
||||
unk_token="<unk>",
|
||||
sep_token="<sep>",
|
||||
pad_token="<pad>",
|
||||
cls_token="<cls>",
|
||||
mask_token="<mask>",
|
||||
bos_token="<s>",
|
||||
eos_token="</s>",
|
||||
clean_text=True,
|
||||
tokenize_chinese_chars=True,
|
||||
strip_accents=None,
|
||||
wordpieces_prefix="##",
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(
|
||||
vocab_file,
|
||||
do_lower_case=do_lower_case,
|
||||
unk_token=unk_token,
|
||||
sep_token=sep_token,
|
||||
pad_token=pad_token,
|
||||
cls_token=cls_token,
|
||||
mask_token=mask_token,
|
||||
bos_token=bos_token,
|
||||
eos_token=eos_token,
|
||||
clean_text=clean_text,
|
||||
tokenize_chinese_chars=tokenize_chinese_chars,
|
||||
strip_accents=strip_accents,
|
||||
wordpieces_prefix=wordpieces_prefix,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
|
||||
Funnel Transformer expects a sequence pair mask that has the following format:
|
||||
|
||||
::
|
||||
|
||||
2 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
||||
| first sequence | second sequence |
|
||||
|
||||
if token_ids_1 is None, only returns the first portion of the mask (0's).
|
||||
|
||||
Args:
|
||||
token_ids_0 (:obj:`List[int]`):
|
||||
List of ids.
|
||||
token_ids_1 (:obj:`List[int]`, `optional`):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
:obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
|
||||
sequence(s).
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
if token_ids_1 is None:
|
||||
return len(cls) * [self.cls_token_type_id] + len(token_ids_0 + sep) * [0]
|
||||
return len(cls) * [self.cls_token_type_id] + len(token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]
|
||||
|
||||
def _convert_encoding(self, encoding, **kwargs):
|
||||
# The fast tokenizer doesn't use the function above so we fix the cls token type id when decoding the fast
|
||||
# tokenzier output.
|
||||
encoding_dict = super()._convert_encoding(encoding, **kwargs)
|
||||
if "token_type_ids" in encoding_dict:
|
||||
# Note: we can't assume the <cls> token is in first position because left padding is a thing, hence the
|
||||
# double list comprehension.
|
||||
encoding_dict["token_type_ids"] = [
|
||||
[self.cls_token_type_id if i == self.cls_token_id else t for i, t in zip(input_ids, type_ids)]
|
||||
for input_ids, type_ids in zip(encoding_dict["input_ids"], encoding_dict["token_type_ids"])
|
||||
]
|
||||
return encoding_dict
|
||||
Reference in New Issue
Block a user