Model Templates for Seq2Seq (#9251)
* adapt cookie cutter * fix copy past statement * delete copy statements for now * remove unused import from template * make doc rst * correct config docstring * correct training * correct inputs processing tf enc dec * make style * adapt templates * clean tabs * correct tensor -> Tensor naming * correct indent * correct templates * fix the test * break lines to avoid > 119 * Apply suggestions from code review
This commit is contained in:
committed by
GitHub
parent
e6c1f1cad8
commit
cbe63949d7
2
.github/workflows/model-templates.yml
vendored
2
.github/workflows/model-templates.yml
vendored
@@ -40,6 +40,8 @@ jobs:
|
|||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
|
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
|
||||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
||||||
|
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
||||||
|
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
||||||
make style
|
make style
|
||||||
python utils/check_table.py --fix_and_overwrite
|
python utils/check_table.py --fix_and_overwrite
|
||||||
python utils/check_dummies.py --fix_and_overwrite
|
python utils/check_dummies.py --fix_and_overwrite
|
||||||
|
|||||||
@@ -1077,7 +1077,7 @@ class TFBartModel(TFBartPretrainedModel):
|
|||||||
|
|
||||||
decoder_outputs = self.decoder(
|
decoder_outputs = self.decoder(
|
||||||
inputs["decoder_input_ids"],
|
inputs["decoder_input_ids"],
|
||||||
attention_mask=decoder_attention_mask,
|
attention_mask=inputs["decoder_attention_mask"],
|
||||||
encoder_hidden_states=inputs["encoder_outputs"][0],
|
encoder_hidden_states=inputs["encoder_outputs"][0],
|
||||||
encoder_attention_mask=inputs["attention_mask"],
|
encoder_attention_mask=inputs["attention_mask"],
|
||||||
past_key_values=inputs["past_key_values"],
|
past_key_values=inputs["past_key_values"],
|
||||||
@@ -1228,6 +1228,7 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel):
|
|||||||
output_attentions=inputs["output_attentions"],
|
output_attentions=inputs["output_attentions"],
|
||||||
output_hidden_states=inputs["output_hidden_states"],
|
output_hidden_states=inputs["output_hidden_states"],
|
||||||
return_dict=inputs["return_dict"],
|
return_dict=inputs["return_dict"],
|
||||||
|
training=inputs["training"],
|
||||||
)
|
)
|
||||||
lm_logits = self.model.shared(outputs[0], mode="linear")
|
lm_logits = self.model.shared(outputs[0], mode="linear")
|
||||||
lm_logits = lm_logits + self.final_logits_bias
|
lm_logits = lm_logits + self.final_logits_bias
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ if is_tokenizers_available():
|
|||||||
from .tokenization_{{cookiecutter.lowercase_modelname}}_fast import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
from .tokenization_{{cookiecutter.lowercase_modelname}}_fast import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||||
|
|
||||||
{%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "PyTorch") %}
|
{%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "PyTorch") %}
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
@@ -44,8 +45,20 @@ if is_torch_available():
|
|||||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||||
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
||||||
)
|
)
|
||||||
|
{% else %}
|
||||||
|
if is_torch_available():
|
||||||
|
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||||
|
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||||
|
{{cookiecutter.camelcase_modelname}}Model,
|
||||||
|
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||||
|
)
|
||||||
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "TensorFlow") %}
|
{%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "TensorFlow") %}
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||||
if is_tf_available():
|
if is_tf_available():
|
||||||
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||||
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
@@ -59,4 +72,12 @@ if is_tf_available():
|
|||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||||
)
|
)
|
||||||
{% endif %}
|
{% else %}
|
||||||
|
if is_tf_available():
|
||||||
|
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||||
|
)
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
|||||||
@@ -6,5 +6,6 @@
|
|||||||
"authors": "{{cookiecutter.authors}}",
|
"authors": "{{cookiecutter.authors}}",
|
||||||
"checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
|
"checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
|
||||||
"tokenizer_type": "{{cookiecutter.tokenizer_type}}",
|
"tokenizer_type": "{{cookiecutter.tokenizer_type}}",
|
||||||
"generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}"
|
"generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}",
|
||||||
|
"is_encoder_decoder_model": ["True", "False"]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
|||||||
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
vocab_size (:obj:`int`, `optional`, defaults to 30522):
|
vocab_size (:obj:`int`, `optional`, defaults to 30522):
|
||||||
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
||||||
:obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
|
:obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
|
||||||
@@ -70,6 +71,50 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
|||||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||||
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
|
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
|
||||||
The epsilon used by the layer normalization layers.
|
The epsilon used by the layer normalization layers.
|
||||||
|
{% else -%}
|
||||||
|
vocab_size (:obj:`int`, `optional`, defaults to 50265):
|
||||||
|
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
||||||
|
:obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
|
||||||
|
:class:`~transformers.TF{{cookiecutter.camelcase_modelname}}Model`.
|
||||||
|
d_model (:obj:`int`, `optional`, defaults to 1024):
|
||||||
|
Dimensionality of the layers and the pooler layer.
|
||||||
|
encoder_layers (:obj:`int`, `optional`, defaults to 12):
|
||||||
|
Number of encoder layers.
|
||||||
|
decoder_layers (:obj:`int`, `optional`, defaults to 12):
|
||||||
|
Number of decoder layers.
|
||||||
|
encoder_attention_heads (:obj:`int`, `optional`, defaults to 16):
|
||||||
|
Number of attention heads for each attention layer in the Transformer encoder.
|
||||||
|
decoder_attention_heads (:obj:`int`, `optional`, defaults to 16):
|
||||||
|
Number of attention heads for each attention layer in the Transformer decoder.
|
||||||
|
decoder_ffn_dim (:obj:`int`, `optional`, defaults to 4096):
|
||||||
|
Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
|
||||||
|
encoder_ffn_dim (:obj:`int`, `optional`, defaults to 4096):
|
||||||
|
Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
|
||||||
|
activation_function (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`):
|
||||||
|
The non-linear activation function (function or string) in the encoder and pooler. If string,
|
||||||
|
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"` and :obj:`"gelu_new"` are supported.
|
||||||
|
dropout (:obj:`float`, `optional`, defaults to 0.1):
|
||||||
|
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||||
|
attention_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||||
|
The dropout ratio for the attention probabilities.
|
||||||
|
activation_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||||
|
The dropout ratio for activations inside the fully connected layer.
|
||||||
|
classifier_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||||
|
The dropout ratio for classifier.
|
||||||
|
max_position_embeddings (:obj:`int`, `optional`, defaults to 1024):
|
||||||
|
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
||||||
|
just in case (e.g., 512 or 1024 or 2048).
|
||||||
|
init_std (:obj:`float`, `optional`, defaults to 0.02):
|
||||||
|
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||||
|
encoder_layerdrop: (:obj:`float`, `optional`, defaults to 0.0):
|
||||||
|
The LayerDrop probability for the encoder. See the `LayerDrop paper <see
|
||||||
|
https://arxiv.org/abs/1909.11556>`__ for more details.
|
||||||
|
decoder_layerdrop: (:obj:`float`, `optional`, defaults to 0.0):
|
||||||
|
The LayerDrop probability for the decoder. See the `LayerDrop paper <see
|
||||||
|
https://arxiv.org/abs/1909.11556>`__ for more details.
|
||||||
|
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||||
|
Whether or not the model should return the last key/values attentions (not used by all models).
|
||||||
|
{% endif -%}
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
@@ -88,9 +133,9 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
vocab_size=30522,
|
vocab_size=30522,
|
||||||
hidden_size=768,
|
hidden_size=768,
|
||||||
is_encoder_decoder=False,
|
|
||||||
num_hidden_layers=12,
|
num_hidden_layers=12,
|
||||||
num_attention_heads=12,
|
num_attention_heads=12,
|
||||||
intermediate_size=3072,
|
intermediate_size=3072,
|
||||||
@@ -101,6 +146,29 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
|||||||
type_vocab_size=2,
|
type_vocab_size=2,
|
||||||
initializer_range=0.02,
|
initializer_range=0.02,
|
||||||
layer_norm_eps=1e-12,
|
layer_norm_eps=1e-12,
|
||||||
|
is_encoder_decoder=False,
|
||||||
|
{% else -%}
|
||||||
|
vocab_size=50265,
|
||||||
|
max_position_embeddings=1024,
|
||||||
|
encoder_layers=12,
|
||||||
|
encoder_ffn_dim=4096,
|
||||||
|
encoder_attention_heads=16,
|
||||||
|
decoder_layers=12,
|
||||||
|
decoder_ffn_dim=4096,
|
||||||
|
decoder_attention_heads=16,
|
||||||
|
encoder_layerdrop=0.0,
|
||||||
|
decoder_layerdrop=0.0,
|
||||||
|
use_cache=True,
|
||||||
|
is_encoder_decoder=True,
|
||||||
|
activation_function="gelu",
|
||||||
|
d_model=1024,
|
||||||
|
dropout=0.1,
|
||||||
|
attention_dropout=0.0,
|
||||||
|
activation_dropout=0.0,
|
||||||
|
init_std=0.02,
|
||||||
|
decoder_start_token_id=2,
|
||||||
|
classifier_dropout=0.0,
|
||||||
|
{% endif -%}
|
||||||
pad_token_id=1,
|
pad_token_id=1,
|
||||||
bos_token_id=0,
|
bos_token_id=0,
|
||||||
eos_token_id=2,
|
eos_token_id=2,
|
||||||
@@ -108,13 +176,19 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
|||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
pad_token_id=pad_token_id,
|
pad_token_id=pad_token_id,
|
||||||
is_encoder_decoder=is_encoder_decoder,
|
|
||||||
bos_token_id=bos_token_id,
|
bos_token_id=bos_token_id,
|
||||||
eos_token_id=eos_token_id,
|
eos_token_id=eos_token_id,
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
|
{% else -%}
|
||||||
|
is_encoder_decoder=is_encoder_decoder,
|
||||||
|
decoder_start_token_id=decoder_start_token_id,
|
||||||
|
{% endif -%}
|
||||||
**kwargs
|
**kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = vocab_size
|
||||||
|
self.max_position_embeddings = max_position_embeddings
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
self.hidden_size = hidden_size
|
self.hidden_size = hidden_size
|
||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.num_attention_heads = num_attention_heads
|
self.num_attention_heads = num_attention_heads
|
||||||
@@ -122,8 +196,36 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
|||||||
self.hidden_act = hidden_act
|
self.hidden_act = hidden_act
|
||||||
self.hidden_dropout_prob = hidden_dropout_prob
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
self.max_position_embeddings = max_position_embeddings
|
|
||||||
self.type_vocab_size = type_vocab_size
|
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
|
self.type_vocab_size = type_vocab_size
|
||||||
self.layer_norm_eps = layer_norm_eps
|
self.layer_norm_eps = layer_norm_eps
|
||||||
|
{% else -%}
|
||||||
|
self.d_model = d_model
|
||||||
|
self.encoder_ffn_dim = encoder_ffn_dim
|
||||||
|
self.encoder_layers = encoder_layers
|
||||||
|
self.encoder_attention_heads = encoder_attention_heads
|
||||||
|
self.decoder_ffn_dim = decoder_ffn_dim
|
||||||
|
self.decoder_layers = decoder_layers
|
||||||
|
self.decoder_attention_heads = decoder_attention_heads
|
||||||
|
self.dropout = dropout
|
||||||
|
self.attention_dropout = attention_dropout
|
||||||
|
self.activation_dropout = activation_dropout
|
||||||
|
self.activation_function = activation_function
|
||||||
|
self.init_std = init_std
|
||||||
|
self.encoder_layerdrop = encoder_layerdrop
|
||||||
|
self.decoder_layerdrop = decoder_layerdrop
|
||||||
|
self.classifier_dropout = classifier_dropout
|
||||||
|
self.use_cache = use_cache
|
||||||
|
self.num_hidden_layers = encoder_layers
|
||||||
|
{% endif -%}
|
||||||
|
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||||
|
{%- else %}
|
||||||
|
@property
|
||||||
|
def num_attention_heads(self) -> int:
|
||||||
|
return self.encoder_attention_heads
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hidden_size(self) -> int:
|
||||||
|
return self.d_model
|
||||||
|
{%- endif %}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -13,6 +13,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
@@ -318,3 +319,272 @@ class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCa
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
|
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
|
||||||
|
|
||||||
|
{% else %}
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from transformers import {{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Tokenizer, is_tf_available
|
||||||
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_tf, slow
|
||||||
|
|
||||||
|
from .test_configuration_common import ConfigTester
|
||||||
|
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
||||||
|
|
||||||
|
|
||||||
|
if is_tf_available():
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from transformers import TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TF{{cookiecutter.camelcase_modelname}}ModelTester:
|
||||||
|
config_cls = {{cookiecutter.camelcase_modelname}}Config
|
||||||
|
config_updates = {}
|
||||||
|
hidden_act = "gelu"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_labels=False,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=37,
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=20,
|
||||||
|
eos_token_id=2,
|
||||||
|
pad_token_id=1,
|
||||||
|
bos_token_id=0,
|
||||||
|
):
|
||||||
|
self.parent = parent
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.seq_length = seq_length
|
||||||
|
self.is_training = is_training
|
||||||
|
self.use_labels = use_labels
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.num_hidden_layers = num_hidden_layers
|
||||||
|
self.num_attention_heads = num_attention_heads
|
||||||
|
self.intermediate_size = intermediate_size
|
||||||
|
|
||||||
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
|
self.max_position_embeddings = max_position_embeddings
|
||||||
|
self.eos_token_id = eos_token_id
|
||||||
|
self.pad_token_id = pad_token_id
|
||||||
|
self.bos_token_id = bos_token_id
|
||||||
|
|
||||||
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size)
|
||||||
|
eos_tensor = tf.expand_dims(tf.constant([self.eos_token_id] * self.batch_size), 1)
|
||||||
|
input_ids = tf.concat([input_ids, eos_tensor], axis=1)
|
||||||
|
|
||||||
|
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|
||||||
|
config = self.config_cls(
|
||||||
|
vocab_size=self.vocab_size,
|
||||||
|
d_model=self.hidden_size,
|
||||||
|
encoder_layers=self.num_hidden_layers,
|
||||||
|
decoder_layers=self.num_hidden_layers,
|
||||||
|
encoder_attention_heads=self.num_attention_heads,
|
||||||
|
decoder_attention_heads=self.num_attention_heads,
|
||||||
|
encoder_ffn_dim=self.intermediate_size,
|
||||||
|
decoder_ffn_dim=self.intermediate_size,
|
||||||
|
dropout=self.hidden_dropout_prob,
|
||||||
|
attention_dropout=self.attention_probs_dropout_prob,
|
||||||
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
|
eos_token_ids=[2],
|
||||||
|
bos_token_id=self.bos_token_id,
|
||||||
|
pad_token_id=self.pad_token_id,
|
||||||
|
decoder_start_token_id=self.pad_token_id,
|
||||||
|
**self.config_updates,
|
||||||
|
)
|
||||||
|
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
|
||||||
|
return config, inputs_dict
|
||||||
|
|
||||||
|
def check_decoder_model_past_large_inputs(self, config, inputs_dict):
|
||||||
|
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config).get_decoder()
|
||||||
|
input_ids = inputs_dict["input_ids"]
|
||||||
|
|
||||||
|
input_ids = input_ids[:1, :]
|
||||||
|
attention_mask = inputs_dict["attention_mask"][:1, :]
|
||||||
|
self.batch_size = 1
|
||||||
|
|
||||||
|
# first forward pass
|
||||||
|
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
|
||||||
|
|
||||||
|
output, past_key_values = outputs.to_tuple()
|
||||||
|
past_key_values = past_key_values[1]
|
||||||
|
|
||||||
|
# create hypothetical next token and extent to next_input_ids
|
||||||
|
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
||||||
|
next_attn_mask = tf.cast(ids_tensor((self.batch_size, 3), 2), tf.int8)
|
||||||
|
|
||||||
|
# append to next input_ids and
|
||||||
|
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
||||||
|
next_attention_mask = tf.concat([attention_mask, next_attn_mask], axis=-1)
|
||||||
|
|
||||||
|
output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)[0]
|
||||||
|
output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)[0]
|
||||||
|
|
||||||
|
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
|
||||||
|
|
||||||
|
# select random slice
|
||||||
|
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
||||||
|
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
|
||||||
|
output_from_past_slice = output_from_past[:, :, random_slice_idx]
|
||||||
|
|
||||||
|
# test that outputs are equal for slice
|
||||||
|
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
||||||
|
config,
|
||||||
|
input_ids,
|
||||||
|
decoder_input_ids,
|
||||||
|
attention_mask=None,
|
||||||
|
decoder_attention_mask=None,
|
||||||
|
):
|
||||||
|
if attention_mask is None:
|
||||||
|
attention_mask = tf.cast(tf.math.not_equal(input_ids, config.pad_token_id), tf.int8)
|
||||||
|
if decoder_attention_mask is None:
|
||||||
|
decoder_attention_mask = tf.cast(tf.math.not_equal(decoder_input_ids, config.pad_token_id), tf.int8)
|
||||||
|
return {
|
||||||
|
"input_ids": input_ids,
|
||||||
|
"decoder_input_ids": decoder_input_ids,
|
||||||
|
"attention_mask": attention_mask,
|
||||||
|
"decoder_attention_mask": decoder_attention_mask,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||||
|
all_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model) if is_tf_available() else ()
|
||||||
|
all_generative_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_tf_available() else ()
|
||||||
|
is_encoder_decoder = True
|
||||||
|
test_pruning = False
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.model_tester = TF{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
||||||
|
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
|
||||||
|
|
||||||
|
def test_config(self):
|
||||||
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
def test_decoder_model_past_large_inputs(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_model_common_attributes(self):
|
||||||
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
|
for model_class in self.all_model_classes:
|
||||||
|
model = model_class(config)
|
||||||
|
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||||
|
x = model.get_output_layer_with_bias()
|
||||||
|
assert x is None
|
||||||
|
name = model.get_prefix_bias_name()
|
||||||
|
assert name is None
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||||
|
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
||||||
|
if a is None and b is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
if tf.debugging.assert_near(a, b, atol=atol):
|
||||||
|
return True
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
msg = "{} != {}".format(a, b)
|
||||||
|
if prefix:
|
||||||
|
msg = prefix + ": " + msg
|
||||||
|
raise AssertionError(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def _long_tensor(tok_lst):
|
||||||
|
return tf.constant(tok_lst, dtype=tf.int32)
|
||||||
|
|
||||||
|
|
||||||
|
TOLERANCE = 1e-4
|
||||||
|
|
||||||
|
|
||||||
|
@slow
|
||||||
|
@require_sentencepiece
|
||||||
|
@require_tokenizers
|
||||||
|
@require_tf
|
||||||
|
class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
||||||
|
def test_inference_no_head(self):
|
||||||
|
model = TF{{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||||
|
# change to intended input here
|
||||||
|
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||||
|
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||||
|
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||||
|
output = model(**inputs_dict)[0]
|
||||||
|
expected_shape = (1, 11, 1024)
|
||||||
|
self.assertEqual(output.shape, expected_shape)
|
||||||
|
# change to expected output here
|
||||||
|
expected_slice = tf.Tensor(
|
||||||
|
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
||||||
|
)
|
||||||
|
self.assertTrue(tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE))
|
||||||
|
|
||||||
|
def test_inference_with_head(self):
|
||||||
|
model = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||||
|
# change to intended input here
|
||||||
|
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||||
|
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||||
|
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||||
|
output = model(**inputs_dict)[0]
|
||||||
|
expected_shape = (1, 11, 1024)
|
||||||
|
self.assertEqual(output.shape, expected_shape)
|
||||||
|
# change to expected output here
|
||||||
|
expected_slice = tf.Tensor(
|
||||||
|
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
||||||
|
)
|
||||||
|
self.assertTrue(tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE))
|
||||||
|
|
||||||
|
def test_seq_to_seq_generation(self):
|
||||||
|
hf = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||||
|
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||||
|
|
||||||
|
batch_input = [
|
||||||
|
# string 1,
|
||||||
|
# string 2,
|
||||||
|
# string 3,
|
||||||
|
# string 4,
|
||||||
|
]
|
||||||
|
|
||||||
|
# The below article tests that we don't add any hypotheses outside of the top n_beams
|
||||||
|
dct = tok.batch_encode_plus(
|
||||||
|
batch_input,
|
||||||
|
max_length=512,
|
||||||
|
padding="max_length",
|
||||||
|
truncation_strategy="only_first",
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="tf",
|
||||||
|
)
|
||||||
|
|
||||||
|
hypotheses_batch = hf.generate(
|
||||||
|
input_ids=dct["input_ids"],
|
||||||
|
attention_mask=dct["attention_mask"],
|
||||||
|
num_beams=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
EXPECTED = [
|
||||||
|
# here expected 1,
|
||||||
|
# here expected 2,
|
||||||
|
# here expected 3,
|
||||||
|
# here expected 4,
|
||||||
|
]
|
||||||
|
|
||||||
|
generated = tok.batch_decode(
|
||||||
|
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
||||||
|
)
|
||||||
|
assert generated == EXPECTED
|
||||||
|
{%- endif %}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
""" Testing suite for the PyTorch {{cookiecutter.modelname}} model. """
|
""" Testing suite for the PyTorch {{cookiecutter.modelname}} model. """
|
||||||
|
|
||||||
|
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from tests.test_modeling_common import floats_tensor
|
from tests.test_modeling_common import floats_tensor
|
||||||
@@ -406,3 +407,395 @@ class {{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
|
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
|
||||||
|
|
||||||
|
|
||||||
|
{% else -%}
|
||||||
|
import copy
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import timeout_decorator # noqa
|
||||||
|
|
||||||
|
from transformers import is_torch_available
|
||||||
|
from transformers.file_utils import cached_property
|
||||||
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||||
|
|
||||||
|
from .test_configuration_common import ConfigTester
|
||||||
|
from .test_generation_utils import GenerationTesterMixin
|
||||||
|
from .test_modeling_common import ModelTesterMixin, ids_tensor
|
||||||
|
|
||||||
|
|
||||||
|
if is_torch_available():
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from transformers import (
|
||||||
|
{{cookiecutter.camelcase_modelname}}Config,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||||
|
{{cookiecutter.camelcase_modelname}}Model,
|
||||||
|
{{cookiecutter.camelcase_modelname}}Tokenizer,
|
||||||
|
)
|
||||||
|
from transformers.models.{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||||
|
{{cookiecutter.camelcase_modelname}}Decoder,
|
||||||
|
{{cookiecutter.camelcase_modelname}}Encoder,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
||||||
|
config,
|
||||||
|
input_ids,
|
||||||
|
decoder_input_ids,
|
||||||
|
attention_mask=None,
|
||||||
|
decoder_attention_mask=None,
|
||||||
|
):
|
||||||
|
if attention_mask is None:
|
||||||
|
attention_mask = input_ids.ne(config.pad_token_id)
|
||||||
|
if decoder_attention_mask is None:
|
||||||
|
decoder_attention_mask = decoder_input_ids.ne(config.pad_token_id)
|
||||||
|
return {
|
||||||
|
"input_ids": input_ids,
|
||||||
|
"decoder_input_ids": decoder_input_ids,
|
||||||
|
"attention_mask": attention_mask,
|
||||||
|
"decoder_attention_mask": attention_mask,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class {{cookiecutter.camelcase_modelname}}ModelTester:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
parent,
|
||||||
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_labels=False,
|
||||||
|
vocab_size=99,
|
||||||
|
hidden_size=16,
|
||||||
|
num_hidden_layers=2,
|
||||||
|
num_attention_heads=4,
|
||||||
|
intermediate_size=4,
|
||||||
|
hidden_act="gelu",
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_probs_dropout_prob=0.1,
|
||||||
|
max_position_embeddings=20,
|
||||||
|
eos_token_id=2,
|
||||||
|
pad_token_id=1,
|
||||||
|
bos_token_id=0,
|
||||||
|
):
|
||||||
|
self.parent = parent
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.seq_length = seq_length
|
||||||
|
self.is_training = is_training
|
||||||
|
self.use_labels = use_labels
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.num_hidden_layers = num_hidden_layers
|
||||||
|
self.num_attention_heads = num_attention_heads
|
||||||
|
self.intermediate_size = intermediate_size
|
||||||
|
self.hidden_act = hidden_act
|
||||||
|
self.hidden_dropout_prob = hidden_dropout_prob
|
||||||
|
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||||
|
self.max_position_embeddings = max_position_embeddings
|
||||||
|
self.eos_token_id = eos_token_id
|
||||||
|
self.pad_token_id = pad_token_id
|
||||||
|
self.bos_token_id = bos_token_id
|
||||||
|
|
||||||
|
def prepare_config_and_inputs(self):
|
||||||
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(
|
||||||
|
3,
|
||||||
|
)
|
||||||
|
input_ids[:, -1] = self.eos_token_id # Eos Token
|
||||||
|
|
||||||
|
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
|
|
||||||
|
config = {{cookiecutter.camelcase_modelname}}Config(
|
||||||
|
vocab_size=self.vocab_size,
|
||||||
|
d_model=self.hidden_size,
|
||||||
|
encoder_layers=self.num_hidden_layers,
|
||||||
|
decoder_layers=self.num_hidden_layers,
|
||||||
|
encoder_attention_heads=self.num_attention_heads,
|
||||||
|
decoder_attention_heads=self.num_attention_heads,
|
||||||
|
encoder_ffn_dim=self.intermediate_size,
|
||||||
|
decoder_ffn_dim=self.intermediate_size,
|
||||||
|
dropout=self.hidden_dropout_prob,
|
||||||
|
attention_dropout=self.attention_probs_dropout_prob,
|
||||||
|
max_position_embeddings=self.max_position_embeddings,
|
||||||
|
eos_token_id=self.eos_token_id,
|
||||||
|
bos_token_id=self.bos_token_id,
|
||||||
|
pad_token_id=self.pad_token_id,
|
||||||
|
)
|
||||||
|
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
|
||||||
|
return config, inputs_dict
|
||||||
|
|
||||||
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
config, inputs_dict = self.prepare_config_and_inputs()
|
||||||
|
return config, inputs_dict
|
||||||
|
|
||||||
|
def create_and_check_decoder_model_past_large_inputs(self, config, inputs_dict):
|
||||||
|
model = {{cookiecutter.camelcase_modelname}}Model(config=config).get_decoder().to(torch_device).eval()
|
||||||
|
input_ids = inputs_dict["input_ids"]
|
||||||
|
attention_mask = inputs_dict["attention_mask"]
|
||||||
|
|
||||||
|
# first forward pass
|
||||||
|
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
|
||||||
|
|
||||||
|
output, past_key_values = outputs.to_tuple()
|
||||||
|
|
||||||
|
# create hypothetical multiple next token and extent to next_input_ids
|
||||||
|
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
||||||
|
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
|
||||||
|
|
||||||
|
# append to next input_ids and
|
||||||
|
next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
|
||||||
|
next_attention_mask = torch.cat([attention_mask, next_attn_mask], dim=-1)
|
||||||
|
|
||||||
|
output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)["last_hidden_state"]
|
||||||
|
output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)["last_hidden_state"]
|
||||||
|
|
||||||
|
# select random slice
|
||||||
|
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
||||||
|
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach()
|
||||||
|
output_from_past_slice = output_from_past[:, :, random_slice_idx].detach()
|
||||||
|
|
||||||
|
self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1])
|
||||||
|
|
||||||
|
# test that outputs are equal for slice
|
||||||
|
self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-2))
|
||||||
|
|
||||||
|
def check_encoder_decoder_model_standalone(self, config, inputs_dict):
|
||||||
|
model = {{cookiecutter.camelcase_modelname}}Model(config=config).to(torch_device).eval()
|
||||||
|
outputs = model(**inputs_dict)
|
||||||
|
|
||||||
|
encoder_last_hidden_state = outputs.encoder_last_hidden_state
|
||||||
|
last_hidden_state = outputs.last_hidden_state
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
|
encoder = model.get_encoder()
|
||||||
|
encoder.save_pretrained(tmpdirname)
|
||||||
|
encoder = {{cookiecutter.camelcase_modelname}}Encoder.from_pretrained(tmpdirname).to(torch_device)
|
||||||
|
|
||||||
|
encoder_last_hidden_state_2 = encoder(inputs_dict["input_ids"], attention_mask=inputs_dict["attention_mask"])[
|
||||||
|
0
|
||||||
|
]
|
||||||
|
|
||||||
|
self.parent.assertTrue((encoder_last_hidden_state_2 - encoder_last_hidden_state).abs().max().item() < 1e-3)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
|
decoder = model.get_decoder()
|
||||||
|
decoder.save_pretrained(tmpdirname)
|
||||||
|
decoder = {{cookiecutter.camelcase_modelname}}Decoder.from_pretrained(tmpdirname).to(torch_device)
|
||||||
|
|
||||||
|
last_hidden_state_2 = decoder(
|
||||||
|
input_ids=inputs_dict["decoder_input_ids"],
|
||||||
|
attention_mask=inputs_dict["decoder_attention_mask"],
|
||||||
|
encoder_hidden_states=encoder_last_hidden_state,
|
||||||
|
encoder_attention_mask=inputs_dict["attention_mask"],
|
||||||
|
)[0]
|
||||||
|
|
||||||
|
self.parent.assertTrue((last_hidden_state_2 - last_hidden_state).abs().max().item() < 1e-3)
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
class {{cookiecutter.camelcase_modelname}}ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
|
||||||
|
all_model_classes = (
|
||||||
|
({{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration, {{cookiecutter.camelcase_modelname}}ForSequenceClassification, {{cookiecutter.camelcase_modelname}}ForQuestionAnswering)
|
||||||
|
if is_torch_available()
|
||||||
|
else ()
|
||||||
|
)
|
||||||
|
all_generative_model_classes = ({{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_torch_available() else ()
|
||||||
|
is_encoder_decoder = True
|
||||||
|
test_pruning = False
|
||||||
|
test_head_masking = False
|
||||||
|
test_missing_keys = False
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.model_tester = {{cookiecutter.camelcase_modelname}}ModelTester(self)
|
||||||
|
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
|
||||||
|
|
||||||
|
def test_config(self):
|
||||||
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
def test_initialization_more(self):
|
||||||
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
|
||||||
|
model = {{cookiecutter.camelcase_modelname}}Model(config)
|
||||||
|
model.to(torch_device)
|
||||||
|
model.eval()
|
||||||
|
# test init
|
||||||
|
self.assertTrue((model.encoder.embed_tokens.weight == model.shared.weight).all().item())
|
||||||
|
|
||||||
|
def _check_var(module):
|
||||||
|
"""Check that we initialized various parameters from N(0, config.init_std)."""
|
||||||
|
self.assertAlmostEqual(torch.std(module.weight).item(), config.init_std, 2)
|
||||||
|
|
||||||
|
_check_var(model.encoder.embed_tokens)
|
||||||
|
_check_var(model.encoder.layers[0].self_attn.k_proj)
|
||||||
|
_check_var(model.encoder.layers[0].fc1)
|
||||||
|
_check_var(model.encoder.embed_positions)
|
||||||
|
|
||||||
|
def test_save_load_strict(self):
|
||||||
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
|
||||||
|
for model_class in self.all_model_classes:
|
||||||
|
model = model_class(config)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
|
model.save_pretrained(tmpdirname)
|
||||||
|
model2, info = model_class.from_pretrained(tmpdirname, output_loading_info=True)
|
||||||
|
self.assertEqual(info["missing_keys"], [])
|
||||||
|
|
||||||
|
def test_decoder_model_past_with_large_inputs(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_encoder_decoder_model_standalone(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
|
||||||
|
|
||||||
|
# {{cookiecutter.camelcase_modelname}}ForSequenceClassification does not support inputs_embeds
|
||||||
|
def test_inputs_embeds(self):
|
||||||
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
|
for model_class in ({{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration, {{cookiecutter.camelcase_modelname}}ForQuestionAnswering):
|
||||||
|
model = model_class(config)
|
||||||
|
model.to(torch_device)
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
|
||||||
|
|
||||||
|
if not self.is_encoder_decoder:
|
||||||
|
input_ids = inputs["input_ids"]
|
||||||
|
del inputs["input_ids"]
|
||||||
|
else:
|
||||||
|
encoder_input_ids = inputs["input_ids"]
|
||||||
|
decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
|
||||||
|
del inputs["input_ids"]
|
||||||
|
inputs.pop("decoder_input_ids", None)
|
||||||
|
|
||||||
|
wte = model.get_input_embeddings()
|
||||||
|
if not self.is_encoder_decoder:
|
||||||
|
inputs["inputs_embeds"] = wte(input_ids)
|
||||||
|
else:
|
||||||
|
inputs["inputs_embeds"] = wte(encoder_input_ids)
|
||||||
|
inputs["decoder_inputs_embeds"] = wte(decoder_input_ids)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
model(**inputs)[0]
|
||||||
|
|
||||||
|
def test_generate_fp16(self):
|
||||||
|
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||||
|
input_ids = input_dict["input_ids"]
|
||||||
|
attention_mask = input_ids.ne(1).to(torch_device)
|
||||||
|
model = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration(config).eval().to(torch_device)
|
||||||
|
if torch_device == "cuda":
|
||||||
|
model.half()
|
||||||
|
model.generate(input_ids, attention_mask=attention_mask)
|
||||||
|
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||||
|
|
||||||
|
|
||||||
|
def assert_tensors_close(a, b, atol=1e-12, prefix=""):
|
||||||
|
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
|
||||||
|
if a is None and b is None:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
if torch.allclose(a, b, atol=atol):
|
||||||
|
return True
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
pct_different = (torch.gt((a - b).abs(), atol)).float().mean().item()
|
||||||
|
if a.numel() > 100:
|
||||||
|
msg = f"tensor values are {pct_different:.1%} percent different."
|
||||||
|
else:
|
||||||
|
msg = f"{a} != {b}"
|
||||||
|
if prefix:
|
||||||
|
msg = prefix + ": " + msg
|
||||||
|
raise AssertionError(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def _long_tensor(tok_lst):
|
||||||
|
return torch.tensor(tok_lst, dtype=torch.long, device=torch_device)
|
||||||
|
|
||||||
|
|
||||||
|
TOLERANCE = 1e-4
|
||||||
|
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
@require_sentencepiece
|
||||||
|
@require_tokenizers
|
||||||
|
@slow
|
||||||
|
class {{cookiecutter.camelcase_modelname}}ModelIntegrationTests(unittest.TestCase):
|
||||||
|
@cached_property
|
||||||
|
def default_tokenizer(self):
|
||||||
|
return {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||||
|
|
||||||
|
def test_inference_no_head(self):
|
||||||
|
model = {{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
|
||||||
|
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||||
|
decoder_input_ids = _long_tensor([[2, 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588]])
|
||||||
|
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||||
|
with torch.no_grad():
|
||||||
|
output = model(**inputs_dict)[0]
|
||||||
|
expected_shape = torch.Size((1, 11, 1024))
|
||||||
|
self.assertEqual(output.shape, expected_shape)
|
||||||
|
# change to expected output here
|
||||||
|
expected_slice = torch.tensor(
|
||||||
|
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]], device=torch_device
|
||||||
|
)
|
||||||
|
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
|
||||||
|
|
||||||
|
def test_inference_head(self):
|
||||||
|
model = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
|
||||||
|
|
||||||
|
# change to intended input
|
||||||
|
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||||
|
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||||
|
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||||
|
with torch.no_grad():
|
||||||
|
output = model(**inputs_dict)[0]
|
||||||
|
expected_shape = torch.Size((1, 11, model.config.vocab_size))
|
||||||
|
self.assertEqual(output.shape, expected_shape)
|
||||||
|
# change to expected output here
|
||||||
|
expected_slice = torch.tensor(
|
||||||
|
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]], device=torch_device
|
||||||
|
)
|
||||||
|
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
|
||||||
|
|
||||||
|
def test_seq_to_seq_generation(self):
|
||||||
|
hf = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
|
||||||
|
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||||
|
|
||||||
|
batch_input = [
|
||||||
|
# string 1,
|
||||||
|
# string 2,
|
||||||
|
# string 3,
|
||||||
|
# string 4,
|
||||||
|
]
|
||||||
|
|
||||||
|
# The below article tests that we don't add any hypotheses outside of the top n_beams
|
||||||
|
dct = tok.batch_encode_plus(
|
||||||
|
batch_input,
|
||||||
|
max_length=512,
|
||||||
|
padding="max_length",
|
||||||
|
truncation_strategy="only_first",
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
|
|
||||||
|
hypotheses_batch = hf.generate(
|
||||||
|
input_ids=dct["input_ids"].to(torch_device),
|
||||||
|
attention_mask=dct["attention_mask"].to(torch_device),
|
||||||
|
num_beams=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
EXPECTED = [
|
||||||
|
# here expected 1,
|
||||||
|
# here expected 2,
|
||||||
|
# here expected 3,
|
||||||
|
# here expected 4,
|
||||||
|
]
|
||||||
|
|
||||||
|
generated = tok.batch_decode(
|
||||||
|
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
||||||
|
)
|
||||||
|
assert generated == EXPECTED
|
||||||
|
{% endif -%}
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
# To replace in: "src/transformers/__init__.py"
|
# To replace in: "src/transformers/__init__.py"
|
||||||
# Below: "if is_torch_available():" if generating PyTorch
|
# Below: "if is_torch_available():" if generating PyTorch
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||||
@@ -41,10 +42,20 @@
|
|||||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||||
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
||||||
)
|
)
|
||||||
|
{% else %}
|
||||||
|
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||||
|
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||||
|
{{cookiecutter.camelcase_modelname}}Model,
|
||||||
|
)
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "if is_tf_available():" if generating TensorFlow
|
# Below: "if is_tf_available():" if generating TensorFlow
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||||
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||||
@@ -57,11 +68,19 @@
|
|||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||||
)
|
)
|
||||||
|
{% else %}
|
||||||
|
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||||
|
)
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "if is_tokenizers_available():"
|
# Below: "if is_tokenizers_available():"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
from models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||||
|
# End.
|
||||||
|
|
||||||
# Below: "from .models.albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig"
|
# Below: "from .models.albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
@@ -101,6 +120,7 @@ from ..{{cookiecutter.lowercase_modelname}}.configuration_{{cookiecutter.lowerca
|
|||||||
|
|
||||||
# Below: "# Add modeling imports here"
|
# Below: "# Add modeling imports here"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||||
@@ -110,6 +130,14 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
|
|||||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||||
{{cookiecutter.camelcase_modelname}}Model,
|
{{cookiecutter.camelcase_modelname}}Model,
|
||||||
)
|
)
|
||||||
|
{% else -%}
|
||||||
|
from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||||
|
{{cookiecutter.camelcase_modelname}}Model,
|
||||||
|
)
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Base model mapping"
|
# Below: "# Base model mapping"
|
||||||
@@ -119,17 +147,27 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
|
|||||||
|
|
||||||
# Below: "# Model with LM heads mapping"
|
# Below: "# Model with LM heads mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
||||||
|
{% else %}
|
||||||
|
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Causal LM mapping"
|
# Below: "# Model for Causal LM mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForCausalLM),
|
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForCausalLM),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Masked LM mapping"
|
# Below: "# Model for Masked LM mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Sequence Classification mapping"
|
# Below: "# Model for Sequence Classification mapping"
|
||||||
@@ -144,14 +182,27 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
|
|||||||
|
|
||||||
# Below: "# Model for Token Classification mapping"
|
# Below: "# Model for Token Classification mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForTokenClassification),
|
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForTokenClassification),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Multiple Choice mapping"
|
# Below: "# Model for Multiple Choice mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMultipleChoice),
|
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMultipleChoice),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
|
# Below: "# Model for Seq2Seq Causal LM mapping"
|
||||||
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
|
{% else %}
|
||||||
|
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
|
||||||
|
{% endif -%}
|
||||||
|
# End.
|
||||||
|
|
||||||
# To replace in: "src/transformers/models/auto/modeling_tf_auto.py" if generating TensorFlow
|
# To replace in: "src/transformers/models/auto/modeling_tf_auto.py" if generating TensorFlow
|
||||||
# Below: "from .configuration_auto import ("
|
# Below: "from .configuration_auto import ("
|
||||||
@@ -161,6 +212,7 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
|
|||||||
|
|
||||||
# Below: "# Add modeling imports here"
|
# Below: "# Add modeling imports here"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||||
@@ -170,6 +222,12 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase
|
|||||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||||
)
|
)
|
||||||
|
{% else -%}
|
||||||
|
from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||||
|
)
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Base model mapping"
|
# Below: "# Base model mapping"
|
||||||
@@ -179,35 +237,65 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase
|
|||||||
|
|
||||||
# Below: "# Model with LM heads mapping"
|
# Below: "# Model with LM heads mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
||||||
|
{% else %}
|
||||||
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Causal LM mapping"
|
# Below: "# Model for Causal LM mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForCausalLM),
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForCausalLM),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Masked LM mapping"
|
# Below: "# Model for Masked LM mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Sequence Classification mapping"
|
# Below: "# Model for Sequence Classification mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification),
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Question Answering mapping"
|
# Below: "# Model for Question Answering mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering),
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Token Classification mapping"
|
# Below: "# Model for Token Classification mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForTokenClassification),
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForTokenClassification),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|
||||||
# Below: "# Model for Multiple Choice mapping"
|
# Below: "# Model for Multiple Choice mapping"
|
||||||
# Replace with:
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice),
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice),
|
||||||
|
{% else -%}
|
||||||
|
{% endif -%}
|
||||||
|
# End.
|
||||||
|
|
||||||
|
# Below: "# Model for Seq2Seq Causal LM mapping"
|
||||||
|
# Replace with:
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||||
|
{% else %}
|
||||||
|
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
|
||||||
|
{% endif -%}
|
||||||
# End.
|
# End.
|
||||||
|
|||||||
@@ -56,6 +56,47 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
|
|||||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||||
|
|
||||||
|
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
||||||
|
from ...utils import logging
|
||||||
|
from ..bart.tokenization_bart_fast import BartTokenizerFast
|
||||||
|
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
PRETRAINED_VOCAB_FILES_MAP = {
|
||||||
|
"vocab_file": {
|
||||||
|
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.json",
|
||||||
|
},
|
||||||
|
"merges_file": {
|
||||||
|
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/merges.txt",
|
||||||
|
},
|
||||||
|
"tokenizer_file": {
|
||||||
|
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/tokenizer.json",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||||
|
"{{cookiecutter.checkpoint_identifier}}": 1024,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BartTokenizerFast):
|
||||||
|
r"""
|
||||||
|
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's `tokenizers` library).
|
||||||
|
|
||||||
|
:class:`~transformers.{{cookiecutter.camelcase_modelname}}TokenizerFast` is identical to :class:`~transformers.BartTokenizerFast` and runs
|
||||||
|
end-to-end tokenization: punctuation splitting and wordpiece.
|
||||||
|
|
||||||
|
Refer to superclass :class:`~transformers.BartTokenizerFast` for usage examples and documentation concerning
|
||||||
|
parameters.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||||
|
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||||
|
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||||
|
|
||||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|||||||
@@ -54,6 +54,45 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(BertTokenizer):
|
|||||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||||
|
|
||||||
|
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
||||||
|
from ...utils import logging
|
||||||
|
from ..bart.tokenization_bart import BartTokenizer
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
PRETRAINED_VOCAB_FILES_MAP = {
|
||||||
|
"vocab_file": {
|
||||||
|
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.json",
|
||||||
|
},
|
||||||
|
"merges_file": {
|
||||||
|
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/merges.txt",
|
||||||
|
},
|
||||||
|
"tokenizer_file": {
|
||||||
|
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/tokenizer.json",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||||
|
"{{cookiecutter.checkpoint_identifier}}": 1024,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class {{cookiecutter.camelcase_modelname}}Tokenizer(BartTokenizer):
|
||||||
|
"""
|
||||||
|
Construct a {{cookiecutter.modelname}} tokenizer.
|
||||||
|
|
||||||
|
:class:`~transformers.{{cookiecutter.camelcase_modelname}}Tokenizer` is identical to :class:`~transformers.BartTokenizer` and runs end-to-end
|
||||||
|
tokenization: punctuation splitting and wordpiece.
|
||||||
|
|
||||||
|
Refer to superclass :class:`~transformers.BartTokenizer` for usage examples and documentation concerning
|
||||||
|
parameters.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||||
|
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||||
|
|
||||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
@@ -289,5 +328,4 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast)
|
|||||||
return len(cls + token_ids_0 + sep) * [0]
|
return len(cls + token_ids_0 + sep) * [0]
|
||||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||||
|
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ Tips:
|
|||||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}Model
|
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}Model
|
||||||
:members: forward
|
:members: forward
|
||||||
|
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||||
{{cookiecutter.camelcase_modelname}}ForCausalLM
|
{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
@@ -83,7 +83,7 @@ Tips:
|
|||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
||||||
:members:
|
:members: forward
|
||||||
|
|
||||||
|
|
||||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
||||||
@@ -99,6 +99,29 @@ Tips:
|
|||||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||||
:members: forward
|
:members: forward
|
||||||
|
|
||||||
|
{%- else %}
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||||
|
:members: forward
|
||||||
|
|
||||||
|
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||||
|
:members: forward
|
||||||
|
|
||||||
|
|
||||||
|
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||||
|
:members: forward
|
||||||
|
|
||||||
|
|
||||||
|
{% endif -%}
|
||||||
{% endif -%}
|
{% endif -%}
|
||||||
{% if "TensorFlow" in cookiecutter.generate_tensorflow_and_pytorch -%}
|
{% if "TensorFlow" in cookiecutter.generate_tensorflow_and_pytorch -%}
|
||||||
|
|
||||||
@@ -108,7 +131,7 @@ TF{{cookiecutter.camelcase_modelname}}Model
|
|||||||
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}Model
|
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}Model
|
||||||
:members: call
|
:members: call
|
||||||
|
|
||||||
|
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
@@ -120,7 +143,7 @@ TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
|||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||||
:members: forward
|
:members: call
|
||||||
|
|
||||||
|
|
||||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||||
@@ -151,4 +174,11 @@ TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
|||||||
:members: call
|
:members: call
|
||||||
|
|
||||||
|
|
||||||
|
{%- else %}
|
||||||
|
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||||
|
:members: call
|
||||||
|
{% endif -%}
|
||||||
{% endif -%}
|
{% endif -%}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
"camelcase_modelname": "BrandNewBert",
|
"camelcase_modelname": "BrandNewBert",
|
||||||
"authors": "The HuggingFace Team",
|
"authors": "The HuggingFace Team",
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||||
"tokenizer_type": ["Based on BERT", "Standalone"],
|
"tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"],
|
||||||
"generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"]
|
"generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"],
|
||||||
}
|
"is_encoder_decoder_model": ["True", "False"]
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,5 +6,6 @@
|
|||||||
"authors": "The HuggingFace Team",
|
"authors": "The HuggingFace Team",
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||||
"tokenizer_type": "Based on BERT",
|
"tokenizer_type": "Based on BERT",
|
||||||
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow"
|
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow",
|
||||||
|
"is_encoder_decoder_model": "False"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,5 +6,6 @@
|
|||||||
"authors": "The HuggingFace Team",
|
"authors": "The HuggingFace Team",
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||||
"tokenizer_type": "Based on BERT",
|
"tokenizer_type": "Based on BERT",
|
||||||
"generate_tensorflow_and_pytorch": "PyTorch"
|
"generate_tensorflow_and_pytorch": "PyTorch",
|
||||||
|
"is_encoder_decoder_model": "False"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"modelname": "NewENCDEC",
|
||||||
|
"uppercase_modelname": "NEW_ENC_DEC",
|
||||||
|
"lowercase_modelname": "new_enc_dec",
|
||||||
|
"camelcase_modelname": "NewEncDec",
|
||||||
|
"authors": "The HuggingFace Team",
|
||||||
|
"checkpoint_identifier": "new-enc-dec-base",
|
||||||
|
"tokenizer_type": "Based on BART",
|
||||||
|
"generate_tensorflow_and_pytorch": "PyTorch",
|
||||||
|
"is_encoder_decoder_model": "True"
|
||||||
|
}
|
||||||
@@ -6,5 +6,6 @@
|
|||||||
"authors": "The HuggingFace Team",
|
"authors": "The HuggingFace Team",
|
||||||
"checkpoint_identifier": "bi-brand-new-bert-base-cased",
|
"checkpoint_identifier": "bi-brand-new-bert-base-cased",
|
||||||
"tokenizer_type": "Standalone",
|
"tokenizer_type": "Standalone",
|
||||||
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow"
|
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow",
|
||||||
|
"is_encoder_decoder_model": "False"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,5 +6,6 @@
|
|||||||
"authors": "The HuggingFace Team",
|
"authors": "The HuggingFace Team",
|
||||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||||
"tokenizer_type": "Based on BERT",
|
"tokenizer_type": "Based on BERT",
|
||||||
"generate_tensorflow_and_pytorch": "TensorFlow"
|
"generate_tensorflow_and_pytorch": "TensorFlow",
|
||||||
|
"is_encoder_decoder_model": "False"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"modelname": "NewTFENCDEC",
|
||||||
|
"uppercase_modelname": "NEW_TF_ENC_DEC",
|
||||||
|
"lowercase_modelname": "new_tf_enc_dec",
|
||||||
|
"camelcase_modelname": "NewTFEncDec",
|
||||||
|
"authors": "The HuggingFace Team",
|
||||||
|
"checkpoint_identifier": "new-tf-enc-dec-base",
|
||||||
|
"tokenizer_type": "Based on BART",
|
||||||
|
"generate_tensorflow_and_pytorch": "TensorFlow",
|
||||||
|
"is_encoder_decoder_model": "True"
|
||||||
|
}
|
||||||
@@ -94,7 +94,7 @@ class TFBartModelTester:
|
|||||||
self.batch_size = 1
|
self.batch_size = 1
|
||||||
|
|
||||||
# first forward pass
|
# first forward pass
|
||||||
outputs = model(input_ids, use_cache=True)
|
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
|
||||||
|
|
||||||
output, past_key_values = outputs.to_tuple()
|
output, past_key_values = outputs.to_tuple()
|
||||||
past_key_values = past_key_values[1]
|
past_key_values = past_key_values[1]
|
||||||
|
|||||||
Reference in New Issue
Block a user