[TFBart] Split TF-Bart (#9497)
* make templates ready * make add_new_model_command_ready * finish tf bart * prepare tf mbart * finish tf bart * add tf mbart * add marian * prep pegasus * add tf pegasus * push blenderbot tf * add blenderbot * add blenderbot small * clean-up * make fix copy * define blend bot tok * fix * up * make style * add to docs * add copy statements * overwrite changes * improve * fix docs * finish * fix last slow test * fix missing git conflict line * fix blenderbot * up * fix blenderbot small * load changes * finish copied from * upload fix
This commit is contained in:
committed by
GitHub
parent
0ecbb69806
commit
7f28613213
@@ -1,5 +1,5 @@
|
||||
# coding=utf-8
|
||||
# Copyright {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
||||
# Copyright 2021 {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -1464,7 +1464,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(TF{{cookiecutte
|
||||
)
|
||||
|
||||
{% else %}
|
||||
import math
|
||||
import random
|
||||
from typing import Dict, Optional, Tuple, Union
|
||||
|
||||
@@ -1936,7 +1935,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
|
||||
input_ids (:obj:`tf.Tensor` of shape :obj:`({0})`):
|
||||
Indices of input sequence tokens in the vocabulary.
|
||||
|
||||
Indices can be obtained using :class:`~transformers.BertTokenizer`. See
|
||||
Indices can be obtained using :class:`~transformers.{{cookiecutter.camelcase_modelname}}Tokenizer`. See
|
||||
:meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
|
||||
details.
|
||||
|
||||
@@ -1949,8 +1948,21 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
|
||||
|
||||
`What are attention masks? <../glossary.html#attention-mask>`__
|
||||
decoder_input_ids (:obj:`tf.Tensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
|
||||
Provide for translation and summarization training. By default, the model will create this tensor by
|
||||
shifting the input_ids right, following the paper.
|
||||
Indices of decoder input sequence tokens in the vocabulary.
|
||||
|
||||
Indices can be obtained using :class:`~transformers.{{cookiecutter.camelcase_modelname}}Tokenizer`. See
|
||||
:meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
|
||||
details.
|
||||
|
||||
`What are input IDs? <../glossary.html#input-ids>`__
|
||||
|
||||
{{cookiecutter.camelcase_modelname}} uses the :obj:`eos_token_id` as the starting token for
|
||||
:obj:`decoder_input_ids` generation. If :obj:`past_key_values` is used, optionally only the last
|
||||
:obj:`decoder_input_ids` have to be input (see :obj:`past_key_values`).
|
||||
|
||||
For translation and summarization training, :obj:`decoder_input_ids` should be provided. If no
|
||||
:obj:`decoder_input_ids` is provided, the model will create this tensor by shifting the :obj:`input_ids` to
|
||||
the right for denoising pre-training following the paper.
|
||||
decoder_attention_mask (:obj:`tf.Tensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
|
||||
will be made by default and ignore pad tokens. It is not recommended to set this for most use cases.
|
||||
encoder_outputs (:obj:`tf.FloatTensor`, `optional`):
|
||||
@@ -1971,7 +1983,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
|
||||
Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for
|
||||
more detail.
|
||||
return_dict (:obj:`bool`, `optional`):
|
||||
Whether or not to return a :class:`~transformers.file_utils.TFModelOutput` instead of a plain tuple.
|
||||
Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
|
||||
training (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
Whether or not to use the model in training mode (some modules like dropout modules have different
|
||||
behaviors between training and evaluation).
|
||||
@@ -1996,7 +2008,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
|
||||
self.layerdrop = config.encoder_layerdrop
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.max_source_positions = config.max_position_embeddings
|
||||
self.embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
|
||||
self.embed_scale = tf.math.sqrt(float(config.d_model)) if config.scale_embedding else 1.0
|
||||
|
||||
|
||||
self.embed_tokens = embed_tokens
|
||||
@@ -2077,14 +2089,10 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
|
||||
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
||||
|
||||
if inputs["inputs_embeds"] is None:
|
||||
inputs_embeds = self.embed_tokens(inputs["input_ids"]) * self.embed_scale
|
||||
else:
|
||||
inputs_embeds = inputs["inputs_embeds"]
|
||||
|
||||
inputs_embeds = inputs_embeds
|
||||
inputs["inputs_embeds"] = self.embed_tokens(inputs["input_ids"]) * self.embed_scale
|
||||
|
||||
embed_pos = self.embed_positions(input_shape)
|
||||
hidden_states = inputs_embeds + embed_pos
|
||||
hidden_states = inputs["inputs_embeds"] + embed_pos
|
||||
hidden_states = self.layernorm_embedding(hidden_states)
|
||||
hidden_states = self.dropout(hidden_states, training=inputs["training"])
|
||||
|
||||
@@ -2146,7 +2154,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
|
||||
self.padding_idx,
|
||||
name="embed_positions",
|
||||
)
|
||||
self.embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
|
||||
self.embed_scale = tf.math.sqrt(float(config.d_model)) if config.scale_embedding else 1.0
|
||||
self.layers = [TF{{cookiecutter.camelcase_modelname}}DecoderLayer(config, name=f"layers.{i}") for i in range(config.decoder_layers)]
|
||||
self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding")
|
||||
|
||||
@@ -2259,7 +2267,6 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
|
||||
hidden_states = inputs["inputs_embeds"]
|
||||
|
||||
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
||||
combined_attention_mask = None
|
||||
if input_shape[-1] > 1:
|
||||
combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length=past_key_values_length)
|
||||
else:
|
||||
@@ -2267,21 +2274,8 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
|
||||
tf.ones((input_shape[0], input_shape[1] + past_key_values_length)), tgt_len=input_shape[-1]
|
||||
)
|
||||
|
||||
if inputs["attention_mask"] is None and inputs["input_ids"] is not None and input_shape[-1] > 1:
|
||||
inputs["attention_mask"] = tf.cast(
|
||||
tf.math.not_equal(inputs["input_ids"], self.config.pad_token_id), inputs["input_ids"].dtype
|
||||
)
|
||||
inputs["attention_mask"] = tf.concat(
|
||||
[
|
||||
tf.ones((input_shape[0], past_key_values_length), dtype=inputs["attention_mask"].dtype),
|
||||
inputs["attention_mask"],
|
||||
],
|
||||
axis=-1,
|
||||
)
|
||||
else:
|
||||
inputs["attention_mask"] = tf.ones(
|
||||
(input_shape[0], input_shape[1] + past_key_values_length), dtype=tf.int32
|
||||
)
|
||||
if inputs["attention_mask"] is not None and input_shape[-1] > 1:
|
||||
combined_attention_mask = combined_attention_mask + _expand_mask(inputs["attention_mask"], tgt_len=input_shape[-1])
|
||||
|
||||
if inputs["encoder_hidden_states"] is not None and inputs["encoder_attention_mask"] is not None:
|
||||
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
||||
@@ -2683,7 +2677,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
|
||||
reordered_past = ()
|
||||
for layer_past_key_values in past_key_values:
|
||||
reordered_past += (
|
||||
tuple(tf.gather(layer_past_key_value, beam_idx) for layer_past_key_value in layer_past_key_values),
|
||||
tuple(tf.gather(layer_past_key_value, beam_idx) for layer_past_key_value in layer_past_key_values[:2]) + layer_past_key_values[2:],
|
||||
)
|
||||
return (past[0], reordered_past)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# coding=utf-8
|
||||
# Copyright {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
||||
# Copyright 2021 {{cookiecutter.authors}} The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# coding=utf-8
|
||||
# Copyright {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -17,7 +17,7 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import {{cookiecutter.camelcase_modelname}}Config, is_tf_available
|
||||
from transformers import is_tf_available, {{cookiecutter.camelcase_modelname}}Config
|
||||
from transformers.testing_utils import require_tf, slow
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
@@ -28,12 +28,12 @@ if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
|
||||
@@ -323,8 +323,12 @@ class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCa
|
||||
{% else %}
|
||||
import unittest
|
||||
|
||||
from transformers import {{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Tokenizer, is_tf_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_tf, slow
|
||||
from transformers import (
|
||||
is_tf_available,
|
||||
{{cookiecutter.camelcase_modelname}}Config,
|
||||
{{cookiecutter.camelcase_modelname}}Tokenizer,
|
||||
)
|
||||
from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
||||
@@ -333,7 +337,10 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers import TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model
|
||||
from transformers import (
|
||||
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
TF{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
|
||||
|
||||
@require_tf
|
||||
@@ -453,7 +460,7 @@ def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
|
||||
if attention_mask is None:
|
||||
attention_mask = tf.cast(tf.math.not_equal(input_ids, config.pad_token_id), tf.int8)
|
||||
if decoder_attention_mask is None:
|
||||
decoder_attention_mask = tf.cast(tf.math.not_equal(decoder_input_ids, config.pad_token_id), tf.int8)
|
||||
decoder_attention_mask = tf.concat([tf.ones(decoder_input_ids[:, :1].shape, dtype=tf.int8), tf.cast(tf.math.not_equal(decoder_input_ids[:, 1:], config.pad_token_id), tf.int8)], axis=-1)
|
||||
return {
|
||||
"input_ids": input_ids,
|
||||
"decoder_input_ids": decoder_input_ids,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# coding=utf-8
|
||||
# Copyright {{cookiecutter.authors}} and The HuggingFace Inc. team. All rights reserved.
|
||||
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -21,8 +21,8 @@ import unittest
|
||||
from tests.test_modeling_common import floats_tensor
|
||||
from transformers import is_torch_available
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
from .test_configuration_common import ConfigTester
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
|
||||
|
||||
|
||||
@@ -31,15 +31,17 @@ if is_torch_available():
|
||||
|
||||
from transformers import (
|
||||
{{cookiecutter.camelcase_modelname}}Config,
|
||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
|
||||
{{cookiecutter.camelcase_modelname}}ForTokenClassification,
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
from transformers.models.{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
from transformers.models.{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
)
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
|
||||
Reference in New Issue
Block a user