Model Templates for Seq2Seq (#9251)
* adapt cookie cutter * fix copy past statement * delete copy statements for now * remove unused import from template * make doc rst * correct config docstring * correct training * correct inputs processing tf enc dec * make style * adapt templates * clean tabs * correct tensor -> Tensor naming * correct indent * correct templates * fix the test * break lines to avoid > 119 * Apply suggestions from code review
This commit is contained in:
committed by
GitHub
parent
e6c1f1cad8
commit
cbe63949d7
2
.github/workflows/model-templates.yml
vendored
2
.github/workflows/model-templates.yml
vendored
@@ -40,6 +40,8 @@ jobs:
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
||||
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
|
||||
make style
|
||||
python utils/check_table.py --fix_and_overwrite
|
||||
python utils/check_dummies.py --fix_and_overwrite
|
||||
|
||||
@@ -1077,7 +1077,7 @@ class TFBartModel(TFBartPretrainedModel):
|
||||
|
||||
decoder_outputs = self.decoder(
|
||||
inputs["decoder_input_ids"],
|
||||
attention_mask=decoder_attention_mask,
|
||||
attention_mask=inputs["decoder_attention_mask"],
|
||||
encoder_hidden_states=inputs["encoder_outputs"][0],
|
||||
encoder_attention_mask=inputs["attention_mask"],
|
||||
past_key_values=inputs["past_key_values"],
|
||||
@@ -1228,6 +1228,7 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel):
|
||||
output_attentions=inputs["output_attentions"],
|
||||
output_hidden_states=inputs["output_hidden_states"],
|
||||
return_dict=inputs["return_dict"],
|
||||
training=inputs["training"],
|
||||
)
|
||||
lm_logits = self.model.shared(outputs[0], mode="linear")
|
||||
lm_logits = lm_logits + self.final_logits_bias
|
||||
|
||||
@@ -30,6 +30,7 @@ if is_tokenizers_available():
|
||||
from .tokenization_{{cookiecutter.lowercase_modelname}}_fast import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||
|
||||
{%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "PyTorch") %}
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
if is_torch_available():
|
||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
@@ -44,8 +45,20 @@ if is_torch_available():
|
||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
||||
)
|
||||
{% else %}
|
||||
if is_torch_available():
|
||||
from .modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "TensorFlow") %}
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
if is_tf_available():
|
||||
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
@@ -59,4 +72,12 @@ if is_tf_available():
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% else %}
|
||||
if is_tf_available():
|
||||
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
@@ -6,5 +6,6 @@
|
||||
"authors": "{{cookiecutter.authors}}",
|
||||
"checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
|
||||
"tokenizer_type": "{{cookiecutter.tokenizer_type}}",
|
||||
"generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}"
|
||||
"generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}",
|
||||
"is_encoder_decoder_model": ["True", "False"]
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
||||
|
||||
|
||||
Args:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
vocab_size (:obj:`int`, `optional`, defaults to 30522):
|
||||
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
||||
:obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
|
||||
@@ -70,6 +71,50 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
|
||||
The epsilon used by the layer normalization layers.
|
||||
{% else -%}
|
||||
vocab_size (:obj:`int`, `optional`, defaults to 50265):
|
||||
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
|
||||
:obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
|
||||
:class:`~transformers.TF{{cookiecutter.camelcase_modelname}}Model`.
|
||||
d_model (:obj:`int`, `optional`, defaults to 1024):
|
||||
Dimensionality of the layers and the pooler layer.
|
||||
encoder_layers (:obj:`int`, `optional`, defaults to 12):
|
||||
Number of encoder layers.
|
||||
decoder_layers (:obj:`int`, `optional`, defaults to 12):
|
||||
Number of decoder layers.
|
||||
encoder_attention_heads (:obj:`int`, `optional`, defaults to 16):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
decoder_attention_heads (:obj:`int`, `optional`, defaults to 16):
|
||||
Number of attention heads for each attention layer in the Transformer decoder.
|
||||
decoder_ffn_dim (:obj:`int`, `optional`, defaults to 4096):
|
||||
Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
|
||||
encoder_ffn_dim (:obj:`int`, `optional`, defaults to 4096):
|
||||
Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
|
||||
activation_function (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`):
|
||||
The non-linear activation function (function or string) in the encoder and pooler. If string,
|
||||
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"` and :obj:`"gelu_new"` are supported.
|
||||
dropout (:obj:`float`, `optional`, defaults to 0.1):
|
||||
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||
The dropout ratio for the attention probabilities.
|
||||
activation_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
classifier_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||
The dropout ratio for classifier.
|
||||
max_position_embeddings (:obj:`int`, `optional`, defaults to 1024):
|
||||
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (:obj:`float`, `optional`, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
encoder_layerdrop: (:obj:`float`, `optional`, defaults to 0.0):
|
||||
The LayerDrop probability for the encoder. See the `LayerDrop paper <see
|
||||
https://arxiv.org/abs/1909.11556>`__ for more details.
|
||||
decoder_layerdrop: (:obj:`float`, `optional`, defaults to 0.0):
|
||||
The LayerDrop probability for the decoder. See the `LayerDrop paper <see
|
||||
https://arxiv.org/abs/1909.11556>`__ for more details.
|
||||
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models).
|
||||
{% endif -%}
|
||||
|
||||
Example::
|
||||
|
||||
@@ -88,9 +133,9 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
vocab_size=30522,
|
||||
hidden_size=768,
|
||||
is_encoder_decoder=False,
|
||||
num_hidden_layers=12,
|
||||
num_attention_heads=12,
|
||||
intermediate_size=3072,
|
||||
@@ -101,6 +146,29 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
||||
type_vocab_size=2,
|
||||
initializer_range=0.02,
|
||||
layer_norm_eps=1e-12,
|
||||
is_encoder_decoder=False,
|
||||
{% else -%}
|
||||
vocab_size=50265,
|
||||
max_position_embeddings=1024,
|
||||
encoder_layers=12,
|
||||
encoder_ffn_dim=4096,
|
||||
encoder_attention_heads=16,
|
||||
decoder_layers=12,
|
||||
decoder_ffn_dim=4096,
|
||||
decoder_attention_heads=16,
|
||||
encoder_layerdrop=0.0,
|
||||
decoder_layerdrop=0.0,
|
||||
use_cache=True,
|
||||
is_encoder_decoder=True,
|
||||
activation_function="gelu",
|
||||
d_model=1024,
|
||||
dropout=0.1,
|
||||
attention_dropout=0.0,
|
||||
activation_dropout=0.0,
|
||||
init_std=0.02,
|
||||
decoder_start_token_id=2,
|
||||
classifier_dropout=0.0,
|
||||
{% endif -%}
|
||||
pad_token_id=1,
|
||||
bos_token_id=0,
|
||||
eos_token_id=2,
|
||||
@@ -108,13 +176,19 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
||||
):
|
||||
super().__init__(
|
||||
pad_token_id=pad_token_id,
|
||||
is_encoder_decoder=is_encoder_decoder,
|
||||
bos_token_id=bos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else -%}
|
||||
is_encoder_decoder=is_encoder_decoder,
|
||||
decoder_start_token_id=decoder_start_token_id,
|
||||
{% endif -%}
|
||||
**kwargs
|
||||
)
|
||||
|
||||
self.vocab_size = vocab_size
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
@@ -122,8 +196,36 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.initializer_range = initializer_range
|
||||
self.type_vocab_size = type_vocab_size
|
||||
self.layer_norm_eps = layer_norm_eps
|
||||
{% else -%}
|
||||
self.d_model = d_model
|
||||
self.encoder_ffn_dim = encoder_ffn_dim
|
||||
self.encoder_layers = encoder_layers
|
||||
self.encoder_attention_heads = encoder_attention_heads
|
||||
self.decoder_ffn_dim = decoder_ffn_dim
|
||||
self.decoder_layers = decoder_layers
|
||||
self.decoder_attention_heads = decoder_attention_heads
|
||||
self.dropout = dropout
|
||||
self.attention_dropout = attention_dropout
|
||||
self.activation_dropout = activation_dropout
|
||||
self.activation_function = activation_function
|
||||
self.init_std = init_std
|
||||
self.encoder_layerdrop = encoder_layerdrop
|
||||
self.decoder_layerdrop = decoder_layerdrop
|
||||
self.classifier_dropout = classifier_dropout
|
||||
self.use_cache = use_cache
|
||||
self.num_hidden_layers = encoder_layers
|
||||
{% endif -%}
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
{%- else %}
|
||||
@property
|
||||
def num_attention_heads(self) -> int:
|
||||
return self.encoder_attention_heads
|
||||
|
||||
@property
|
||||
def hidden_size(self) -> int:
|
||||
return self.d_model
|
||||
{%- endif %}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -13,6 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
|
||||
import unittest
|
||||
|
||||
@@ -318,3 +319,272 @@ class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCa
|
||||
]
|
||||
)
|
||||
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
|
||||
|
||||
{% else %}
|
||||
import unittest
|
||||
|
||||
from transformers import {{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Tokenizer, is_tf_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_tf, slow
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers import TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model
|
||||
|
||||
|
||||
@require_tf
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
config_cls = {{cookiecutter.camelcase_modelname}}Config
|
||||
config_updates = {}
|
||||
hidden_act = "gelu"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_labels=False,
|
||||
vocab_size=99,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=20,
|
||||
eos_token_id=2,
|
||||
pad_token_id=1,
|
||||
bos_token_id=0,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.eos_token_id = eos_token_id
|
||||
self.pad_token_id = pad_token_id
|
||||
self.bos_token_id = bos_token_id
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size)
|
||||
eos_tensor = tf.expand_dims(tf.constant([self.eos_token_id] * self.batch_size), 1)
|
||||
input_ids = tf.concat([input_ids, eos_tensor], axis=1)
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = self.config_cls(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
decoder_layers=self.num_hidden_layers,
|
||||
encoder_attention_heads=self.num_attention_heads,
|
||||
decoder_attention_heads=self.num_attention_heads,
|
||||
encoder_ffn_dim=self.intermediate_size,
|
||||
decoder_ffn_dim=self.intermediate_size,
|
||||
dropout=self.hidden_dropout_prob,
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
eos_token_ids=[2],
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
decoder_start_token_id=self.pad_token_id,
|
||||
**self.config_updates,
|
||||
)
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def check_decoder_model_past_large_inputs(self, config, inputs_dict):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config).get_decoder()
|
||||
input_ids = inputs_dict["input_ids"]
|
||||
|
||||
input_ids = input_ids[:1, :]
|
||||
attention_mask = inputs_dict["attention_mask"][:1, :]
|
||||
self.batch_size = 1
|
||||
|
||||
# first forward pass
|
||||
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
|
||||
|
||||
output, past_key_values = outputs.to_tuple()
|
||||
past_key_values = past_key_values[1]
|
||||
|
||||
# create hypothetical next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
||||
next_attn_mask = tf.cast(ids_tensor((self.batch_size, 3), 2), tf.int8)
|
||||
|
||||
# append to next input_ids and
|
||||
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
|
||||
next_attention_mask = tf.concat([attention_mask, next_attn_mask], axis=-1)
|
||||
|
||||
output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)[0]
|
||||
output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)[0]
|
||||
|
||||
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
|
||||
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
|
||||
output_from_past_slice = output_from_past[:, :, random_slice_idx]
|
||||
|
||||
# test that outputs are equal for slice
|
||||
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
|
||||
|
||||
|
||||
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
||||
config,
|
||||
input_ids,
|
||||
decoder_input_ids,
|
||||
attention_mask=None,
|
||||
decoder_attention_mask=None,
|
||||
):
|
||||
if attention_mask is None:
|
||||
attention_mask = tf.cast(tf.math.not_equal(input_ids, config.pad_token_id), tf.int8)
|
||||
if decoder_attention_mask is None:
|
||||
decoder_attention_mask = tf.cast(tf.math.not_equal(decoder_input_ids, config.pad_token_id), tf.int8)
|
||||
return {
|
||||
"input_ids": input_ids,
|
||||
"decoder_input_ids": decoder_input_ids,
|
||||
"attention_mask": attention_mask,
|
||||
"decoder_attention_mask": decoder_attention_mask,
|
||||
}
|
||||
|
||||
|
||||
@require_tf
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model) if is_tf_available() else ()
|
||||
all_generative_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_tf_available() else ()
|
||||
is_encoder_decoder = True
|
||||
test_pruning = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = TF{{cookiecutter.camelcase_modelname}}ModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
|
||||
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
|
||||
def test_decoder_model_past_large_inputs(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
|
||||
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
||||
if a is None and b is None:
|
||||
return True
|
||||
try:
|
||||
if tf.debugging.assert_near(a, b, atol=atol):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
msg = "{} != {}".format(a, b)
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
return tf.constant(tok_lst, dtype=tf.int32)
|
||||
|
||||
|
||||
TOLERANCE = 1e-4
|
||||
|
||||
|
||||
@slow
|
||||
@require_sentencepiece
|
||||
@require_tokenizers
|
||||
@require_tf
|
||||
class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
|
||||
def test_inference_no_head(self):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
# change to intended input here
|
||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||
output = model(**inputs_dict)[0]
|
||||
expected_shape = (1, 11, 1024)
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# change to expected output here
|
||||
expected_slice = tf.Tensor(
|
||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
||||
)
|
||||
self.assertTrue(tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
def test_inference_with_head(self):
|
||||
model = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
# change to intended input here
|
||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||
output = model(**inputs_dict)[0]
|
||||
expected_shape = (1, 11, 1024)
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# change to expected output here
|
||||
expected_slice = tf.Tensor(
|
||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
|
||||
)
|
||||
self.assertTrue(tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
def test_seq_to_seq_generation(self):
|
||||
hf = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
|
||||
batch_input = [
|
||||
# string 1,
|
||||
# string 2,
|
||||
# string 3,
|
||||
# string 4,
|
||||
]
|
||||
|
||||
# The below article tests that we don't add any hypotheses outside of the top n_beams
|
||||
dct = tok.batch_encode_plus(
|
||||
batch_input,
|
||||
max_length=512,
|
||||
padding="max_length",
|
||||
truncation_strategy="only_first",
|
||||
truncation=True,
|
||||
return_tensors="tf",
|
||||
)
|
||||
|
||||
hypotheses_batch = hf.generate(
|
||||
input_ids=dct["input_ids"],
|
||||
attention_mask=dct["attention_mask"],
|
||||
num_beams=2,
|
||||
)
|
||||
|
||||
EXPECTED = [
|
||||
# here expected 1,
|
||||
# here expected 2,
|
||||
# here expected 3,
|
||||
# here expected 4,
|
||||
]
|
||||
|
||||
generated = tok.batch_decode(
|
||||
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
||||
)
|
||||
assert generated == EXPECTED
|
||||
{%- endif %}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
""" Testing suite for the PyTorch {{cookiecutter.modelname}} model. """
|
||||
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
import unittest
|
||||
|
||||
from tests.test_modeling_common import floats_tensor
|
||||
@@ -406,3 +407,395 @@ class {{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase
|
||||
)
|
||||
|
||||
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
|
||||
|
||||
|
||||
{% else -%}
|
||||
import copy
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import timeout_decorator # noqa
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
from .test_generation_utils import GenerationTesterMixin
|
||||
from .test_modeling_common import ModelTesterMixin, ids_tensor
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import (
|
||||
{{cookiecutter.camelcase_modelname}}Config,
|
||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
{{cookiecutter.camelcase_modelname}}Tokenizer,
|
||||
)
|
||||
from transformers.models.{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.camelcase_modelname}}Decoder,
|
||||
{{cookiecutter.camelcase_modelname}}Encoder,
|
||||
)
|
||||
|
||||
|
||||
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
||||
config,
|
||||
input_ids,
|
||||
decoder_input_ids,
|
||||
attention_mask=None,
|
||||
decoder_attention_mask=None,
|
||||
):
|
||||
if attention_mask is None:
|
||||
attention_mask = input_ids.ne(config.pad_token_id)
|
||||
if decoder_attention_mask is None:
|
||||
decoder_attention_mask = decoder_input_ids.ne(config.pad_token_id)
|
||||
return {
|
||||
"input_ids": input_ids,
|
||||
"decoder_input_ids": decoder_input_ids,
|
||||
"attention_mask": attention_mask,
|
||||
"decoder_attention_mask": attention_mask,
|
||||
}
|
||||
|
||||
|
||||
@require_torch
|
||||
class {{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
batch_size=13,
|
||||
seq_length=7,
|
||||
is_training=True,
|
||||
use_labels=False,
|
||||
vocab_size=99,
|
||||
hidden_size=16,
|
||||
num_hidden_layers=2,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=4,
|
||||
hidden_act="gelu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
max_position_embeddings=20,
|
||||
eos_token_id=2,
|
||||
pad_token_id=1,
|
||||
bos_token_id=0,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.seq_length = seq_length
|
||||
self.is_training = is_training
|
||||
self.use_labels = use_labels
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.intermediate_size = intermediate_size
|
||||
self.hidden_act = hidden_act
|
||||
self.hidden_dropout_prob = hidden_dropout_prob
|
||||
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.eos_token_id = eos_token_id
|
||||
self.pad_token_id = pad_token_id
|
||||
self.bos_token_id = bos_token_id
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(
|
||||
3,
|
||||
)
|
||||
input_ids[:, -1] = self.eos_token_id # Eos Token
|
||||
|
||||
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
config = {{cookiecutter.camelcase_modelname}}Config(
|
||||
vocab_size=self.vocab_size,
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
decoder_layers=self.num_hidden_layers,
|
||||
encoder_attention_heads=self.num_attention_heads,
|
||||
decoder_attention_heads=self.num_attention_heads,
|
||||
encoder_ffn_dim=self.intermediate_size,
|
||||
decoder_ffn_dim=self.intermediate_size,
|
||||
dropout=self.hidden_dropout_prob,
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
max_position_embeddings=self.max_position_embeddings,
|
||||
eos_token_id=self.eos_token_id,
|
||||
bos_token_id=self.bos_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
)
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
|
||||
return config, inputs_dict
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config, inputs_dict = self.prepare_config_and_inputs()
|
||||
return config, inputs_dict
|
||||
|
||||
def create_and_check_decoder_model_past_large_inputs(self, config, inputs_dict):
|
||||
model = {{cookiecutter.camelcase_modelname}}Model(config=config).get_decoder().to(torch_device).eval()
|
||||
input_ids = inputs_dict["input_ids"]
|
||||
attention_mask = inputs_dict["attention_mask"]
|
||||
|
||||
# first forward pass
|
||||
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
|
||||
|
||||
output, past_key_values = outputs.to_tuple()
|
||||
|
||||
# create hypothetical multiple next token and extent to next_input_ids
|
||||
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
|
||||
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
|
||||
|
||||
# append to next input_ids and
|
||||
next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
|
||||
next_attention_mask = torch.cat([attention_mask, next_attn_mask], dim=-1)
|
||||
|
||||
output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)["last_hidden_state"]
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
||||
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach()
|
||||
output_from_past_slice = output_from_past[:, :, random_slice_idx].detach()
|
||||
|
||||
self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1])
|
||||
|
||||
# test that outputs are equal for slice
|
||||
self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-2))
|
||||
|
||||
def check_encoder_decoder_model_standalone(self, config, inputs_dict):
|
||||
model = {{cookiecutter.camelcase_modelname}}Model(config=config).to(torch_device).eval()
|
||||
outputs = model(**inputs_dict)
|
||||
|
||||
encoder_last_hidden_state = outputs.encoder_last_hidden_state
|
||||
last_hidden_state = outputs.last_hidden_state
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
encoder = model.get_encoder()
|
||||
encoder.save_pretrained(tmpdirname)
|
||||
encoder = {{cookiecutter.camelcase_modelname}}Encoder.from_pretrained(tmpdirname).to(torch_device)
|
||||
|
||||
encoder_last_hidden_state_2 = encoder(inputs_dict["input_ids"], attention_mask=inputs_dict["attention_mask"])[
|
||||
0
|
||||
]
|
||||
|
||||
self.parent.assertTrue((encoder_last_hidden_state_2 - encoder_last_hidden_state).abs().max().item() < 1e-3)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
decoder = model.get_decoder()
|
||||
decoder.save_pretrained(tmpdirname)
|
||||
decoder = {{cookiecutter.camelcase_modelname}}Decoder.from_pretrained(tmpdirname).to(torch_device)
|
||||
|
||||
last_hidden_state_2 = decoder(
|
||||
input_ids=inputs_dict["decoder_input_ids"],
|
||||
attention_mask=inputs_dict["decoder_attention_mask"],
|
||||
encoder_hidden_states=encoder_last_hidden_state,
|
||||
encoder_attention_mask=inputs_dict["attention_mask"],
|
||||
)[0]
|
||||
|
||||
self.parent.assertTrue((last_hidden_state_2 - last_hidden_state).abs().max().item() < 1e-3)
|
||||
|
||||
|
||||
@require_torch
|
||||
class {{cookiecutter.camelcase_modelname}}ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (
|
||||
({{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration, {{cookiecutter.camelcase_modelname}}ForSequenceClassification, {{cookiecutter.camelcase_modelname}}ForQuestionAnswering)
|
||||
if is_torch_available()
|
||||
else ()
|
||||
)
|
||||
all_generative_model_classes = ({{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_torch_available() else ()
|
||||
is_encoder_decoder = True
|
||||
test_pruning = False
|
||||
test_head_masking = False
|
||||
test_missing_keys = False
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = {{cookiecutter.camelcase_modelname}}ModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
|
||||
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
|
||||
def test_initialization_more(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
|
||||
model = {{cookiecutter.camelcase_modelname}}Model(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
# test init
|
||||
self.assertTrue((model.encoder.embed_tokens.weight == model.shared.weight).all().item())
|
||||
|
||||
def _check_var(module):
|
||||
"""Check that we initialized various parameters from N(0, config.init_std)."""
|
||||
self.assertAlmostEqual(torch.std(module.weight).item(), config.init_std, 2)
|
||||
|
||||
_check_var(model.encoder.embed_tokens)
|
||||
_check_var(model.encoder.layers[0].self_attn.k_proj)
|
||||
_check_var(model.encoder.layers[0].fc1)
|
||||
_check_var(model.encoder.embed_positions)
|
||||
|
||||
def test_save_load_strict(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model.save_pretrained(tmpdirname)
|
||||
model2, info = model_class.from_pretrained(tmpdirname, output_loading_info=True)
|
||||
self.assertEqual(info["missing_keys"], [])
|
||||
|
||||
def test_decoder_model_past_with_large_inputs(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
|
||||
|
||||
def test_encoder_decoder_model_standalone(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
|
||||
|
||||
# {{cookiecutter.camelcase_modelname}}ForSequenceClassification does not support inputs_embeds
|
||||
def test_inputs_embeds(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in ({{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration, {{cookiecutter.camelcase_modelname}}ForQuestionAnswering):
|
||||
model = model_class(config)
|
||||
model.to(torch_device)
|
||||
model.eval()
|
||||
|
||||
inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
|
||||
|
||||
if not self.is_encoder_decoder:
|
||||
input_ids = inputs["input_ids"]
|
||||
del inputs["input_ids"]
|
||||
else:
|
||||
encoder_input_ids = inputs["input_ids"]
|
||||
decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
|
||||
del inputs["input_ids"]
|
||||
inputs.pop("decoder_input_ids", None)
|
||||
|
||||
wte = model.get_input_embeddings()
|
||||
if not self.is_encoder_decoder:
|
||||
inputs["inputs_embeds"] = wte(input_ids)
|
||||
else:
|
||||
inputs["inputs_embeds"] = wte(encoder_input_ids)
|
||||
inputs["decoder_inputs_embeds"] = wte(decoder_input_ids)
|
||||
|
||||
with torch.no_grad():
|
||||
model(**inputs)[0]
|
||||
|
||||
def test_generate_fp16(self):
|
||||
config, input_dict = self.model_tester.prepare_config_and_inputs()
|
||||
input_ids = input_dict["input_ids"]
|
||||
attention_mask = input_ids.ne(1).to(torch_device)
|
||||
model = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration(config).eval().to(torch_device)
|
||||
if torch_device == "cuda":
|
||||
model.half()
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
|
||||
def assert_tensors_close(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
|
||||
if a is None and b is None:
|
||||
return True
|
||||
try:
|
||||
if torch.allclose(a, b, atol=atol):
|
||||
return True
|
||||
raise
|
||||
except Exception:
|
||||
pct_different = (torch.gt((a - b).abs(), atol)).float().mean().item()
|
||||
if a.numel() > 100:
|
||||
msg = f"tensor values are {pct_different:.1%} percent different."
|
||||
else:
|
||||
msg = f"{a} != {b}"
|
||||
if prefix:
|
||||
msg = prefix + ": " + msg
|
||||
raise AssertionError(msg)
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
return torch.tensor(tok_lst, dtype=torch.long, device=torch_device)
|
||||
|
||||
|
||||
TOLERANCE = 1e-4
|
||||
|
||||
|
||||
@require_torch
|
||||
@require_sentencepiece
|
||||
@require_tokenizers
|
||||
@slow
|
||||
class {{cookiecutter.camelcase_modelname}}ModelIntegrationTests(unittest.TestCase):
|
||||
@cached_property
|
||||
def default_tokenizer(self):
|
||||
return {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
|
||||
def test_inference_no_head(self):
|
||||
model = {{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
|
||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
decoder_input_ids = _long_tensor([[2, 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588]])
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||
with torch.no_grad():
|
||||
output = model(**inputs_dict)[0]
|
||||
expected_shape = torch.Size((1, 11, 1024))
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# change to expected output here
|
||||
expected_slice = torch.tensor(
|
||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]], device=torch_device
|
||||
)
|
||||
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
def test_inference_head(self):
|
||||
model = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
|
||||
|
||||
# change to intended input
|
||||
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
|
||||
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
|
||||
with torch.no_grad():
|
||||
output = model(**inputs_dict)[0]
|
||||
expected_shape = torch.Size((1, 11, model.config.vocab_size))
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# change to expected output here
|
||||
expected_slice = torch.tensor(
|
||||
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]], device=torch_device
|
||||
)
|
||||
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
|
||||
|
||||
def test_seq_to_seq_generation(self):
|
||||
hf = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
|
||||
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
|
||||
|
||||
batch_input = [
|
||||
# string 1,
|
||||
# string 2,
|
||||
# string 3,
|
||||
# string 4,
|
||||
]
|
||||
|
||||
# The below article tests that we don't add any hypotheses outside of the top n_beams
|
||||
dct = tok.batch_encode_plus(
|
||||
batch_input,
|
||||
max_length=512,
|
||||
padding="max_length",
|
||||
truncation_strategy="only_first",
|
||||
truncation=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
hypotheses_batch = hf.generate(
|
||||
input_ids=dct["input_ids"].to(torch_device),
|
||||
attention_mask=dct["attention_mask"].to(torch_device),
|
||||
num_beams=2,
|
||||
)
|
||||
|
||||
EXPECTED = [
|
||||
# here expected 1,
|
||||
# here expected 2,
|
||||
# here expected 3,
|
||||
# here expected 4,
|
||||
]
|
||||
|
||||
generated = tok.batch_decode(
|
||||
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
||||
)
|
||||
assert generated == EXPECTED
|
||||
{% endif -%}
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
# To replace in: "src/transformers/__init__.py"
|
||||
# Below: "if is_torch_available():" if generating PyTorch
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
@@ -41,10 +42,20 @@
|
||||
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
|
||||
)
|
||||
{% else %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "if is_tf_available():" if generating TensorFlow
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
@@ -57,11 +68,19 @@
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% else %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "if is_tokenizers_available():"
|
||||
# Replace with:
|
||||
from models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||
# End.
|
||||
|
||||
# Below: "from .models.albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig"
|
||||
# Replace with:
|
||||
@@ -101,6 +120,7 @@ from ..{{cookiecutter.lowercase_modelname}}.configuration_{{cookiecutter.lowerca
|
||||
|
||||
# Below: "# Add modeling imports here"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
@@ -110,6 +130,14 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
|
||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
{% else -%}
|
||||
from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Base model mapping"
|
||||
@@ -119,17 +147,27 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
|
||||
|
||||
# Below: "# Model with LM heads mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
||||
{% else %}
|
||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForCausalLM),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Masked LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Sequence Classification mapping"
|
||||
@@ -144,14 +182,27 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
|
||||
|
||||
# Below: "# Model for Token Classification mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForTokenClassification),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Multiple Choice mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMultipleChoice),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Seq2Seq Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else %}
|
||||
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# To replace in: "src/transformers/models/auto/modeling_tf_auto.py" if generating TensorFlow
|
||||
# Below: "from .configuration_auto import ("
|
||||
@@ -161,6 +212,7 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
|
||||
|
||||
# Below: "# Add modeling imports here"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
@@ -170,6 +222,12 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase
|
||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
{% else -%}
|
||||
from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase_modelname}} import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Base model mapping"
|
||||
@@ -179,35 +237,65 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase
|
||||
|
||||
# Below: "# Model with LM heads mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
||||
{% else %}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForCausalLM),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Masked LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Sequence Classification mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Question Answering mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Token Classification mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForTokenClassification),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Multiple Choice mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice),
|
||||
{% else -%}
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
# Below: "# Model for Seq2Seq Causal LM mapping"
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
|
||||
{% else %}
|
||||
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
|
||||
{% endif -%}
|
||||
# End.
|
||||
|
||||
@@ -56,6 +56,47 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
||||
from ...utils import logging
|
||||
from ..bart.tokenization_bart_fast import BartTokenizerFast
|
||||
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.json",
|
||||
},
|
||||
"merges_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/merges.txt",
|
||||
},
|
||||
"tokenizer_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/tokenizer.json",
|
||||
},
|
||||
}
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": 1024,
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BartTokenizerFast):
|
||||
r"""
|
||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's `tokenizers` library).
|
||||
|
||||
:class:`~transformers.{{cookiecutter.camelcase_modelname}}TokenizerFast` is identical to :class:`~transformers.BartTokenizerFast` and runs
|
||||
end-to-end tokenization: punctuation splitting and wordpiece.
|
||||
|
||||
Refer to superclass :class:`~transformers.BartTokenizerFast` for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@@ -54,6 +54,45 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(BertTokenizer):
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
||||
from ...utils import logging
|
||||
from ..bart.tokenization_bart import BartTokenizer
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.json",
|
||||
},
|
||||
"merges_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/merges.txt",
|
||||
},
|
||||
"tokenizer_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/tokenizer.json",
|
||||
},
|
||||
}
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": 1024,
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(BartTokenizer):
|
||||
"""
|
||||
Construct a {{cookiecutter.modelname}} tokenizer.
|
||||
|
||||
:class:`~transformers.{{cookiecutter.camelcase_modelname}}Tokenizer` is identical to :class:`~transformers.BartTokenizer` and runs end-to-end
|
||||
tokenization: punctuation splitting and wordpiece.
|
||||
|
||||
Refer to superclass :class:`~transformers.BartTokenizer` for usage examples and documentation concerning
|
||||
parameters.
|
||||
"""
|
||||
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||
from typing import List, Optional
|
||||
|
||||
@@ -289,5 +328,4 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast)
|
||||
return len(cls + token_ids_0 + sep) * [0]
|
||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||
|
||||
|
||||
{% endif %}
|
||||
|
||||
@@ -57,7 +57,7 @@ Tips:
|
||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}Model
|
||||
:members: forward
|
||||
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@@ -83,7 +83,7 @@ Tips:
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForMultipleChoice
|
||||
:members:
|
||||
:members: forward
|
||||
|
||||
|
||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification
|
||||
@@ -99,6 +99,29 @@ Tips:
|
||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
:members: forward
|
||||
|
||||
{%- else %}
|
||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
:members: forward
|
||||
|
||||
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
:members: forward
|
||||
|
||||
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
:members: forward
|
||||
|
||||
|
||||
{% endif -%}
|
||||
{% endif -%}
|
||||
{% if "TensorFlow" in cookiecutter.generate_tensorflow_and_pytorch -%}
|
||||
|
||||
@@ -108,7 +131,7 @@ TF{{cookiecutter.camelcase_modelname}}Model
|
||||
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}Model
|
||||
:members: call
|
||||
|
||||
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@@ -120,7 +143,7 @@ TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForCausalLM
|
||||
:members: forward
|
||||
:members: call
|
||||
|
||||
|
||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
|
||||
@@ -151,4 +174,11 @@ TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
|
||||
:members: call
|
||||
|
||||
|
||||
{%- else %}
|
||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
|
||||
:members: call
|
||||
{% endif -%}
|
||||
{% endif -%}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
"camelcase_modelname": "BrandNewBert",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": ["Based on BERT", "Standalone"],
|
||||
"generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"]
|
||||
"tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"],
|
||||
"generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"],
|
||||
"is_encoder_decoder_model": ["True", "False"]
|
||||
}
|
||||
@@ -6,5 +6,6 @@
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Based on BERT",
|
||||
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow"
|
||||
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
||||
|
||||
@@ -6,5 +6,6 @@
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Based on BERT",
|
||||
"generate_tensorflow_and_pytorch": "PyTorch"
|
||||
"generate_tensorflow_and_pytorch": "PyTorch",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"modelname": "NewENCDEC",
|
||||
"uppercase_modelname": "NEW_ENC_DEC",
|
||||
"lowercase_modelname": "new_enc_dec",
|
||||
"camelcase_modelname": "NewEncDec",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "new-enc-dec-base",
|
||||
"tokenizer_type": "Based on BART",
|
||||
"generate_tensorflow_and_pytorch": "PyTorch",
|
||||
"is_encoder_decoder_model": "True"
|
||||
}
|
||||
@@ -6,5 +6,6 @@
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "bi-brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Standalone",
|
||||
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow"
|
||||
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
||||
|
||||
@@ -6,5 +6,6 @@
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "brand-new-bert-base-cased",
|
||||
"tokenizer_type": "Based on BERT",
|
||||
"generate_tensorflow_and_pytorch": "TensorFlow"
|
||||
"generate_tensorflow_and_pytorch": "TensorFlow",
|
||||
"is_encoder_decoder_model": "False"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"modelname": "NewTFENCDEC",
|
||||
"uppercase_modelname": "NEW_TF_ENC_DEC",
|
||||
"lowercase_modelname": "new_tf_enc_dec",
|
||||
"camelcase_modelname": "NewTFEncDec",
|
||||
"authors": "The HuggingFace Team",
|
||||
"checkpoint_identifier": "new-tf-enc-dec-base",
|
||||
"tokenizer_type": "Based on BART",
|
||||
"generate_tensorflow_and_pytorch": "TensorFlow",
|
||||
"is_encoder_decoder_model": "True"
|
||||
}
|
||||
@@ -94,7 +94,7 @@ class TFBartModelTester:
|
||||
self.batch_size = 1
|
||||
|
||||
# first forward pass
|
||||
outputs = model(input_ids, use_cache=True)
|
||||
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
|
||||
|
||||
output, past_key_values = outputs.to_tuple()
|
||||
past_key_values = past_key_values[1]
|
||||
|
||||
Reference in New Issue
Block a user