Full rework of the TF input/output embeddings and bias resizing (#9193)

* Start rework resizing

* Rework bias/decoder resizing

* Full resizing rework

* Full resizing rework

* Start to update the models with the new approach

* Finish to update the models

* Update all the tests

* Update the template

* Fix tests

* Fix tests

* Test a new approach

* Refactoring

* Refactoring

* Refactoring

* New rework

* Rework BART

* Rework bert+blenderbot

* Rework CTRL

* Rework Distilbert

* Rework DPR

* Rework Electra

* Rework Flaubert

* Rework Funnel

* Rework GPT2

* Rework Longformer

* Rework Lxmert

* Rework marian+mbart

* Rework mobilebert

* Rework mpnet

* Rework openai

* Rework pegasus

* Rework Roberta

* Rework T5

* Rework xlm+xlnet

* Rework template

* Fix TFT5EncoderOnly + DPRs

* Restore previous methods

* Fix Funnel

* Fix CTRL and TransforXL

* Apply style

* Apply Sylvain's comments

* Restore a test in DPR

* Address the comments

* Fix bug

* Apply style

* remove unused import

* Fix test

* Forgot a method

* missing test

* Trigger CI

* naming update

* Rebase

* Trigger CI
This commit is contained in:
Julien Plu
2021-01-11 12:27:28 +01:00
committed by GitHub
parent cf416764f4
commit 1243ee7d0c
40 changed files with 1473 additions and 593 deletions

View File

@@ -530,6 +530,46 @@ def load_tf_weights(model, resolved_archive_file):
return missing_layers, unexpected_layers return missing_layers, unexpected_layers
def init_copy_embeddings(old_embeddings, new_num_tokens):
r"""
This function aims to reduce the embeddings in case new_num_tokens < old_num_tokens or to pad with -1 in case
new_num_tokens > old_num_tokens. A mask is also computed in order to know which weight in the embeddings should be
kept or not. Example:
- if new_num_tokens=5 and old_num_tokens=4 and old_embeddings=[w1,w2,w3,w4]
- mask=[True,True,True,True,False] and current_weights=[w1,w2,w3,w4,-1]
- if new_num_tokens=4 and old_num_tokens=5 and old_embeddings=[w1,w2,w3,w4,w5]
- mask=[True,True,True,True] and current_weights=[w1,w2,w3,w4]
"""
old_num_tokens, old_embedding_dim = shape_list(old_embeddings)
size_diff = new_num_tokens - old_num_tokens
# initialize new embeddings
# Copy token embeddings from the previous ones
if tf.math.greater(size_diff, 0):
# if the new size is greater than the old one, we extend the current embeddings with a padding until getting new size
# and we create a mask to properly identify the padded values and be replaced by the values of the newly created
# embeddings
current_weights = tf.pad(
old_embeddings.value(), tf.convert_to_tensor([[0, size_diff], [0, 0]]), constant_values=-1
)
num_tokens_to_copy = min(old_num_tokens, new_num_tokens)
mask = tf.fill(tf.convert_to_tensor([num_tokens_to_copy, 1]), True)
mask = tf.pad(mask, tf.convert_to_tensor([[0, size_diff], [0, 0]]), constant_values=False)
else:
# if the new size if lower than the old one, we take the current embeddings until the new size
current_weights = tf.slice(
old_embeddings.value(),
tf.convert_to_tensor([0, 0]),
tf.convert_to_tensor([new_num_tokens, old_embedding_dim]),
)
mask = tf.fill(tf.convert_to_tensor([new_num_tokens, 1]), True)
return mask, current_weights
class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
r""" r"""
Base class for all TF models. Base class for all TF models.
@@ -615,58 +655,132 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
def get_input_embeddings(self) -> tf.keras.layers.Layer: def get_input_embeddings(self) -> tf.keras.layers.Layer:
""" """
Returns the model's input embeddings. Returns the model's input embeddings layer.
Returns: Returns:
:obj:`tf.keras.layers.Layer`: A torch module mapping vocabulary to hidden states. :obj:`tf.Variable`: The embeddings layer mapping vocabulary to hidden states.
""" """
base_model = getattr(self, self.base_model_prefix, self) main_layer = getattr(self, self.base_model_prefix, self)
if base_model is not self: if main_layer is not self:
return base_model.get_input_embeddings() return main_layer.get_input_embeddings()
else: else:
raise NotImplementedError raise NotImplementedError
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
""" """
Set model's input embeddings. Set model's input embeddings
Args: Args:
value (:obj:`tf.keras.layers.Layer`): value (:obj:`tf.Variable`):
A module mapping vocabulary to hidden states. The new weights mapping hidden states to vocabulary.
""" """
base_model = getattr(self, self.base_model_prefix, self) main_layer = getattr(self, self.base_model_prefix)
if base_model is not self:
base_model.set_input_embeddings(value)
else:
raise NotImplementedError
def get_output_embeddings(self) -> tf.keras.layers.Layer: if main_layer is None:
raise NotImplementedError("The model does not implements the base_model_prefix attribute.")
try:
main_layer.set_input_embeddings(value)
except AttributeError:
logger.info("Building the model")
self(self.dummy_inputs)
main_layer.set_input_embeddings(value)
def get_output_embeddings(self) -> Union[None, tf.keras.layers.Layer]:
""" """
Returns the model's output embeddings Returns the model's output embeddings
Returns: Returns:
:obj:`tf.keras.layers.Layer`: A torch module mapping hidden states to vocabulary. :obj:`tf.Variable`: The new weights mapping vocabulary to hidden states.
""" """
if self.get_lm_head() is not None:
lm_head = self.get_lm_head()
return lm_head.get_output_embeddings()
return None # Overwrite for models with output embeddings return None # Overwrite for models with output embeddings
def set_output_embeddings(self, value):
"""
Set model's output embeddings
Args:
value (:obj:`tf.Variable`):
The new weights mapping hidden states to vocabulary.
"""
if self.get_lm_head() is not None:
lm_head = self.get_lm_head()
try:
lm_head.set_output_embeddings(value)
except AttributeError:
logger.info("Building the model")
self(self.dummy_inputs)
lm_head.set_output_embeddings(value)
def get_output_layer_with_bias(self) -> Union[None, tf.keras.layers.Layer]: def get_output_layer_with_bias(self) -> Union[None, tf.keras.layers.Layer]:
""" """
Get the layer that handles a bias attribute in case the model has an LM head with weights tied to the Get the layer that handles a bias attribute in case the model has an LM head with weights tied to the
embeddings. embeddings
Return: Return:
:obj:`tf.keras.layers.Layer`: The layer that handles the bias, None if not an LM model. :obj:`tf.keras.layers.Layer`: The layer that handles the bias, None if not an LM model.
""" """
return None warnings.warn(
"The method get_output_layer_with_bias is deprecated. Please use `get_lm_head` instead.", FutureWarning
)
return self.get_lm_head()
def get_prefix_bias_name(self) -> Union[None, str]: def get_prefix_bias_name(self) -> Union[None, str]:
""" """
Get the concatenated prefix name of the bias from the model name to the parent layer. Get the concatenated prefix name of the bias from the model name to the parent layer
Return: Return:
:obj:`str`: The prefix name of the bias. :obj:`str`: The prefix name of the bias.
""" """
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return None
def get_bias(self) -> Union[None, Dict[str, tf.Variable]]:
"""
Dict of bias attached to an LM head. The key represents the name of the bias attribute.
Return:
:obj:`tf.Variable`: The weights representing the bias, None if not an LM model.
"""
if self.get_lm_head() is not None:
lm_head = self.get_lm_head()
try:
return lm_head.get_bias()
except AttributeError:
self(self.dummy_inputs)
return lm_head.get_bias()
return None
def set_bias(self, value):
"""
Set all the bias in the LM head.
Args:
value (:obj:`Dict[tf.Variable]`):
All the new bias attached to an LM head.
"""
if self.get_lm_head() is not None:
lm_head = self.get_lm_head()
try:
lm_head.set_bias(value)
except AttributeError:
self(self.dummy_inputs)
lm_head.set_bias(value)
def get_lm_head(self) -> tf.keras.layers.Layer:
"""
The LM Head layer. This method must be overwritten by all the models that have a lm head.
Return:
:obj:`tf.keras.layers.Layer`: The LM head layer if the model has one, None if not.
"""
return None return None
def resize_token_embeddings(self, new_num_tokens=None) -> tf.Variable: def resize_token_embeddings(self, new_num_tokens=None) -> tf.Variable:
@@ -685,46 +799,151 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
Return: Return:
:obj:`tf.Variable`: Pointer to the input tokens Embeddings Module of the model. :obj:`tf.Variable`: Pointer to the input tokens Embeddings Module of the model.
""" """
if new_num_tokens is None or new_num_tokens == self.config.vocab_size:
return self._get_word_embedding_weight(self.get_input_embeddings())
model_embeds = self._resize_token_embeddings(new_num_tokens) model_embeds = self._resize_token_embeddings(new_num_tokens)
if new_num_tokens is None:
return model_embeds
return model_embeds
def _resize_token_embeddings(self, new_num_tokens):
# get_input_embeddings and set_input_embeddings need to be implemented in base layer.
base_model = getattr(self, self.base_model_prefix, self)
old_embeddings = base_model.get_input_embeddings()
new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens)
base_model.set_input_embeddings(new_embeddings)
# Update base model and current model config # Update base model and current model config
self.config.vocab_size = new_num_tokens self.config.vocab_size = new_num_tokens
base_model.vocab_size = new_num_tokens
return base_model.get_input_embeddings()
def _get_word_embeddings(self, embeddings): return model_embeds
if hasattr(embeddings, "word_embeddings"):
# TFBertEmbeddings, TFAlbertEmbeddings, TFElectraEmbeddings def _get_word_embedding_weight(self, embedding_layer):
return embeddings.word_embeddings if hasattr(embedding_layer, "word_embeddings"):
elif hasattr(embeddings, "weight"): return embedding_layer.word_embeddings
# TFSharedEmbeddings elif hasattr(embedding_layer, "weight"):
return embeddings.weight return embedding_layer.weight
elif hasattr(embedding_layer, "decoder"):
return embedding_layer.decoder
else: else:
# Here we build the word embeddings weights if not exists. # Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built. # And then we retry to get the attribute once built.
embeddings.build([]) self(self.dummy_inputs)
if hasattr(embeddings, "word_embeddings"): if hasattr(embedding_layer, "word_embeddings"):
# TFBertEmbeddings, TFAlbertEmbeddings, TFElectraEmbeddings return embedding_layer.word_embeddings
return embeddings.word_embeddings elif hasattr(embedding_layer, "weight"):
elif hasattr(embeddings, "weight"): return embedding_layer.weight
# TFSharedEmbeddings elif hasattr(embedding_layer, "decoder"):
return embeddings.weight return embedding_layer.decoder
else: else:
raise ValueError("word embedding is not defined.") return None
def _resize_token_embeddings(self, new_num_tokens):
old_embeddings = self._get_word_embedding_weight(self.get_input_embeddings())
new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens)
# if word embeddings are not tied, make sure that lm head bias is resized as well
if self.get_bias() is not None:
old_lm_head_bias = self.get_bias()
new_lm_head_bias = self._get_resized_lm_head_bias(old_lm_head_bias, new_num_tokens)
self.set_bias(new_lm_head_bias)
# if word embeddings are not tied, make sure that lm head decoder is resized as well
if self.get_output_embeddings() is not None:
old_lm_head_decoder = self._get_word_embedding_weight(self.get_output_embeddings())
new_lm_head_decoder = self._get_resized_lm_head_decoder(old_lm_head_decoder, new_num_tokens)
self.set_output_embeddings(new_lm_head_decoder)
self.set_input_embeddings(new_embeddings)
return self.get_input_embeddings()
def _get_resized_lm_head_bias(self, old_lm_head_bias, new_num_tokens):
"""
Build a resized bias from the old ones. Increasing the size will add newly initialized vectors at the end.
Reducing the size will remove vectors from the end
Args:
old_lm_head_bias (:obj:`tf.Variable`):
Old lm head bias to be resized.
new_num_tokens (:obj:`int`, `optional`):
New number of tokens in the linear matrix.
Increasing the size will add newly initialized vectors at the end. Reducing the size will remove
vectors from the end. If not provided or :obj:`None`, just returns None
Return:
:obj:`tf.Variable`: Pointer to the resized bias.
"""
new_lm_head_bias = {}
for attr, weight in old_lm_head_bias.items():
first_dim, old_num_tokens = (None, shape_list(weight)[0]) if tf.rank(weight) == 1 else shape_list(weight)
size_diff = new_num_tokens - old_num_tokens
final_shape = [new_num_tokens] if first_dim is None else [first_dim, new_num_tokens]
# initialize new bias
if tf.math.greater(size_diff, 0):
padding_shape = [[0, size_diff]] if first_dim is None else [[0, 0], [0, size_diff]]
current_bias = tf.pad(weight.value(), tf.convert_to_tensor(padding_shape), constant_values=-1)
num_tokens_to_copy = min(old_num_tokens, new_num_tokens)
mask_shape = [num_tokens_to_copy] if first_dim is None else [1, num_tokens_to_copy]
bias_mask = tf.fill(tf.convert_to_tensor(mask_shape), True)
bias_mask = tf.pad(bias_mask, tf.convert_to_tensor(padding_shape), constant_values=False)
else:
slice_from = [0] if first_dim is None else [0, 0]
current_bias = tf.slice(
weight.value(), tf.convert_to_tensor(slice_from), tf.convert_to_tensor(final_shape)
)
bias_mask = tf.fill(tf.convert_to_tensor(final_shape), True)
new_bias = self.add_weight(
shape=final_shape,
initializer="zeros",
trainable=True,
name=weight.name.split(":")[0],
)
init_bias = tf.where(bias_mask, current_bias, new_bias.value())
new_bias.assign(init_bias)
new_lm_head_bias[attr] = new_bias
return new_lm_head_bias
def _get_resized_lm_head_decoder(self, old_lm_head_decoder, new_num_tokens):
"""
Build a resized decoder from the old ones. Increasing the size will add newly initialized vectors at the end.
Reducing the size will remove vectors from the end
Args:
old_lm_head_decoder (:obj:`tf.Variable`):
Old lm head decoder to be resized.
new_num_tokens (:obj:`int`, `optional`):
New number of tokens in the linear matrix.
Increasing the size will add newly initialized vectors at the end. Reducing the size will remove
vectors from the end. If not provided or :obj:`None`, just returns None
Return:
:obj:`tf.Variable`: Pointer to the resized decoder or None if the output embeddings are differents of the
input ones.
"""
new_lm_head_decoder = old_lm_head_decoder
is_input_output_equals = tf.reduce_any(
self._get_word_embedding_weight(self.get_input_embeddings()) == old_lm_head_decoder
)
if old_lm_head_decoder is not None and not is_input_output_equals:
old_embedding_dim = shape_list(old_lm_head_decoder)[1]
decoder_mask, current_decoder = init_copy_embeddings(old_lm_head_decoder, new_num_tokens)
new_lm_head_decoder = self.add_weight(
shape=(new_num_tokens, old_embedding_dim),
initializer="zeros",
trainable=True,
name=old_lm_head_decoder.name.split(":")[0],
)
init_decoder = tf.where(decoder_mask, current_decoder, new_lm_head_decoder.value())
new_lm_head_decoder.assign(init_decoder)
return new_lm_head_decoder
def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None) -> tf.Variable: def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None) -> tf.Variable:
""" """
Build a resized Embedding Module from a provided token Embedding Module. Increasing the size will add newly Build a resized Embedding weights from a provided token Embedding weights. Increasing the size will add newly
initialized vectors at the end. Reducing the size will remove vectors from the end initialized vectors at the end. Reducing the size will remove vectors from the end
Args: Args:
@@ -741,87 +960,18 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
:obj:`tf.Variable`: Pointer to the resized Embedding Module or the old Embedding Module if :obj:`tf.Variable`: Pointer to the resized Embedding Module or the old Embedding Module if
:obj:`new_num_tokens` is :obj:`None` :obj:`new_num_tokens` is :obj:`None`
""" """
word_embeddings = self._get_word_embeddings(old_embeddings) old_embedding_dim = shape_list(old_embeddings)[1]
bias_layer = self.get_output_layer_with_bias()
if new_num_tokens is None:
return word_embeddings
old_num_tokens, old_embedding_dim = word_embeddings.shape
if old_num_tokens == new_num_tokens:
return word_embeddings
# initialize new embeddings
# todo: initializer range is not always passed in config.
init_range = getattr(self.config, "initializer_range", 0.02) init_range = getattr(self.config, "initializer_range", 0.02)
name = ( embeddings_mask, current_embeddings = init_copy_embeddings(old_embeddings, new_num_tokens)
self.name
+ "/"
+ self.base_model_prefix
+ "/"
+ old_embeddings.name
+ "/"
+ word_embeddings.name.split(":")[0]
)
new_embeddings = self.add_weight( new_embeddings = self.add_weight(
name=name, name=old_embeddings.name.split(":")[0],
shape=[new_num_tokens, old_embedding_dim], shape=[new_num_tokens, old_embedding_dim],
initializer=get_initializer(init_range), initializer=get_initializer(init_range),
dtype=tf.float32, dtype=tf.float32,
) )
init_weights = tf.make_ndarray(tf.make_tensor_proto(new_embeddings.value())) init_embeddings = tf.where(embeddings_mask, current_embeddings, new_embeddings.value())
# Copy token embeddings from the previous weights new_embeddings.assign(init_embeddings)
num_tokens_to_copy = min(old_num_tokens, new_num_tokens)
init_weights[:num_tokens_to_copy] = word_embeddings.value()[:num_tokens_to_copy, :]
new_embeddings.assign(init_weights)
if bias_layer is not None:
if not hasattr(bias_layer, "bias"):
bias_layer.build([])
# Second check in order to be sure the attribute has been properly created
if not hasattr(bias_layer, "bias"):
raise ValueError("bias is not defined.")
# initialize bias
init_bias = np.zeros((new_num_tokens,))
init_bias[:num_tokens_to_copy] = bias_layer.bias.value()[
:num_tokens_to_copy
] # tf.make_ndarray(tf.make_tensor_proto(bias_layer.bias.value()))[:num_tokens_to_copy]
bias_layer.bias = self.add_weight(
shape=(new_num_tokens,),
initializer="zeros",
trainable=True,
name=self.get_prefix_bias_name() + "/bias",
)
bias_layer.bias.assign(init_bias)
output_embeddings = self.get_output_embeddings()
if output_embeddings is not None:
if self.get_input_embeddings() != output_embeddings:
if not hasattr(output_embeddings, "decoder"):
output_embeddings.build([])
# Second check in order to be sure the attribute has been properly created
if not hasattr(output_embeddings, "decoder"):
raise ValueError("decoder is not defined.")
# initialize decoder
init_weights = np.zeros((new_num_tokens, old_embedding_dim))
init_weights[:num_tokens_to_copy] = output_embeddings.decoder.value()[:num_tokens_to_copy, :]
output_embeddings.decoder = self.add_weight(
shape=(new_num_tokens, old_embedding_dim),
initializer="zeros",
trainable=True,
name=self.get_prefix_bias_name() + "/decoder/weight",
)
output_embeddings.decoder.assign(init_weights)
return new_embeddings return new_embeddings

View File

@@ -470,6 +470,21 @@ class TFAlbertMLMHead(tf.keras.layers.Layer):
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.decoder
def set_output_embeddings(self, value):
self.decoder.word_embeddings = value
self.decoder.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias, "decoder_bias": self.decoder_bias}
def set_bias(self, value):
self.bias = value["bias"]
self.decoder_bias = value["decoder_bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.dense(hidden_states) hidden_states = self.dense(hidden_states)
hidden_states = self.activation(hidden_states) hidden_states = self.activation(hidden_states)
@@ -505,10 +520,7 @@ class TFAlbertMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -835,34 +847,8 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel):
self.predictions = TFAlbertMLMHead(config, self.albert.embeddings, name="predictions") self.predictions = TFAlbertMLMHead(config, self.albert.embeddings, name="predictions")
self.sop_classifier = TFAlbertSOPHead(config, name="sop_classifier") self.sop_classifier = TFAlbertSOPHead(config, name="sop_classifier")
def get_output_embeddings(self): def get_lm_head(self):
return self.albert.embeddings return self.predictions
def resize_token_embeddings(self, new_num_tokens):
super().resize_token_embeddings(new_num_tokens=new_num_tokens)
# ALBERT is a special case where there are two bias to update
# even though self.bias is not used anywhere and is here
# just to make the loading weights from a PT model happy
if new_num_tokens is not None:
num_tokens_to_copy = min(self.predictions.bias.shape[0], new_num_tokens)
self.predictions.vocab_size = num_tokens_to_copy
init_bias = tf.zeros((new_num_tokens,))
init_bias[:num_tokens_to_copy] = self.predictions.bias.value()[:num_tokens_to_copy]
name = self.name + "/" + self.predictions.name + "/bias"
self.predictions.bias = self.add_weight(
shape=(new_num_tokens,), initializer="zeros", trainable=True, name=name
)
self.predictions.bias.assign(init_bias)
init_decoder_bias = tf.zeros((new_num_tokens,))
init_decoder_bias[:num_tokens_to_copy] = self.predictions.decoder_bias.value()[:num_tokens_to_copy]
name = self.name + "/" + self.predictions.name + "/decoder_bias"
self.predictions.decoder_bias = self.add_weight(
shape=(new_num_tokens,), initializer="zeros", trainable=True, name=name
)
self.predictions.decoder_bias.assign(init_decoder_bias)
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=TFAlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=TFAlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
@@ -980,34 +966,8 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss)
self.albert = TFAlbertMainLayer(config, add_pooling_layer=False, name="albert") self.albert = TFAlbertMainLayer(config, add_pooling_layer=False, name="albert")
self.predictions = TFAlbertMLMHead(config, self.albert.embeddings, name="predictions") self.predictions = TFAlbertMLMHead(config, self.albert.embeddings, name="predictions")
def get_output_embeddings(self): def get_lm_head(self):
return self.albert.embeddings return self.predictions
def resize_token_embeddings(self, new_num_tokens):
super().resize_token_embeddings(new_num_tokens=new_num_tokens)
# ALBERT is a special case where there are two bias to update
# even though self.bias is not used anywhere and is here
# just to make the loading weights from a PT model happy
if new_num_tokens is not None:
num_tokens_to_copy = min(self.predictions.bias.shape[0], new_num_tokens)
self.predictions.vocab_size = num_tokens_to_copy
init_bias = tf.zeros((new_num_tokens,))
init_bias[:num_tokens_to_copy] = self.predictions.bias.value()[:num_tokens_to_copy]
name = self.name + "/" + self.predictions.name + "/bias"
self.predictions.bias = self.add_weight(
shape=(new_num_tokens,), initializer="zeros", trainable=True, name=name
)
self.predictions.bias.assign(init_bias)
init_decoder_bias = tf.zeros((new_num_tokens,))
init_decoder_bias[:num_tokens_to_copy] = self.predictions.decoder_bias.value()[:num_tokens_to_copy]
name = self.name + "/" + self.predictions.name + "/decoder_bias"
self.predictions.decoder_bias = self.add_weight(
shape=(new_num_tokens,), initializer="zeros", trainable=True, name=name
)
self.predictions.decoder_bias.assign(init_decoder_bias)
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(

View File

@@ -481,6 +481,29 @@ class TFBartPretrainedModel(TFPreTrainedModel):
} }
return dummy_inputs return dummy_inputs
def get_input_embeddings(self):
base_model = getattr(self, self.base_model_prefix, self)
return base_model.shared
def set_input_embeddings(self, value):
base_model = getattr(self, self.base_model_prefix, self)
try:
base_model.shared.weight = value
except AttributeError:
self(self.dummy_inputs)
base_model.shared.weight = value
base_model.shared.vocab_size = shape_list(base_model.shared.weight)[0]
with tf.compat.v1.variable_scope("model.shared") as shared_abs_scope_name:
pass
embed_tokens = TFWrappedEmbeddings(base_model.shared, abs_scope_name=shared_abs_scope_name)
base_model.encoder.set_embed_tokens(embed_tokens)
base_model.decoder.set_embed_tokens(embed_tokens)
@tf.function( @tf.function(
input_signature=[ input_signature=[
{ {
@@ -634,6 +657,9 @@ class TFBartEncoder(tf.keras.layers.Layer):
else None else None
) )
def set_embed_tokens(self, embed_tokens):
self.embed_tokens = embed_tokens
def call( def call(
self, self,
input_ids=None, input_ids=None,
@@ -791,6 +817,9 @@ class TFBartDecoder(tf.keras.layers.Layer):
self.dropout = tf.keras.layers.Dropout(config.dropout) self.dropout = tf.keras.layers.Dropout(config.dropout)
self.do_blenderbot_90_layernorm = config.do_blenderbot_90_layernorm self.do_blenderbot_90_layernorm = config.do_blenderbot_90_layernorm
def set_embed_tokens(self, embed_tokens):
self.embed_tokens = embed_tokens
def call( def call(
self, self,
input_ids=None, input_ids=None,
@@ -1009,6 +1038,9 @@ class TFBartModel(TFBartPretrainedModel):
self.encoder = TFBartEncoder(config, embed_tokens, name="encoder") self.encoder = TFBartEncoder(config, embed_tokens, name="encoder")
self.decoder = TFBartDecoder(config, embed_tokens, name="decoder") self.decoder = TFBartDecoder(config, embed_tokens, name="decoder")
def get_encoder(self):
return self.encoder
def get_decoder(self): def get_decoder(self):
return self.decoder return self.decoder
@@ -1134,15 +1166,6 @@ class TFBartModel(TFBartPretrainedModel):
encoder_attentions=enc_attns, encoder_attentions=enc_attns,
) )
def get_input_embeddings(self):
return self.shared
def set_input_embeddings(self, value):
self.shared = value
def get_output_embeddings(self):
return self.shared
@add_start_docstrings( @add_start_docstrings(
"The BART Model with a language modeling head. Can be used for summarization.", "The BART Model with a language modeling head. Can be used for summarization.",
@@ -1166,22 +1189,20 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel):
def get_decoder(self): def get_decoder(self):
return self.model.decoder return self.model.decoder
def resize_token_embeddings(self, new_num_tokens): def get_encoder(self):
super().resize_token_embeddings(new_num_tokens=new_num_tokens) return self.model.encoder
# BART is a special case where the bias has two dimensions def get_output_embeddings(self):
# and not named just `bias` return self.get_input_embeddings()
if new_num_tokens is not None:
num_tokens_to_copy = min(self.final_logits_bias.shape[0], new_num_tokens) def set_output_embeddings(self, value):
init_bias = tf.zeros((new_num_tokens,)) self.set_input_embeddings(value)
init_bias[:num_tokens_to_copy] = self.final_logits_bias.value()[:num_tokens_to_copy]
self.final_logits_bias = self.add_weight( def get_bias(self):
shape=(1, new_num_tokens), return {"final_logits_bias": self.final_logits_bias}
initializer="zeros",
trainable=False, def set_bias(self, value):
name="final_logits_bias", self.final_logits_bias = value["final_logits_bias"]
)
self.final_logits_bias.assign(init_bias)
@add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
@@ -1356,12 +1377,6 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel):
else: else:
return logits return logits
def get_output_embeddings(self):
return self.model.shared
def get_encoder(self):
return self.model.encoder
def compute_loss(self, labels, logits): def compute_loss(self, labels, logits):
"""CrossEntropyLoss that ignores pad tokens""" """CrossEntropyLoss that ignores pad tokens"""
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy( loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(

View File

@@ -15,6 +15,7 @@
# limitations under the License. # limitations under the License.
""" TF 2.0 BERT model. """ """ TF 2.0 BERT model. """
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
@@ -526,6 +527,20 @@ class TFBertLMPredictionHead(tf.keras.layers.Layer):
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.word_embeddings = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.transform(hidden_states) hidden_states = self.transform(hidden_states)
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
@@ -582,7 +597,7 @@ class TFBertMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -918,13 +933,11 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
self.nsp = TFBertNSPHead(config, name="nsp___cls") self.nsp = TFBertNSPHead(config, name="nsp___cls")
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls") self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
def get_output_embeddings(self): def get_lm_head(self):
return self.bert.embeddings
def get_output_layer_with_bias(self):
return self.mlm.predictions return self.mlm.predictions
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1044,13 +1057,11 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
self.bert = TFBertMainLayer(config, add_pooling_layer=False, name="bert") self.bert = TFBertMainLayer(config, add_pooling_layer=False, name="bert")
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls") self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
def get_output_embeddings(self): def get_lm_head(self):
return self.bert.embeddings
def get_output_layer_with_bias(self):
return self.mlm.predictions return self.mlm.predictions
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1149,13 +1160,11 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
self.bert = TFBertMainLayer(config, add_pooling_layer=False, name="bert") self.bert = TFBertMainLayer(config, add_pooling_layer=False, name="bert")
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls") self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
def get_output_embeddings(self): def get_lm_head(self):
return self.bert.embeddings
def get_output_layer_with_bias(self):
return self.mlm.predictions return self.mlm.predictions
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name
@add_code_sample_docstrings( @add_code_sample_docstrings(

View File

@@ -15,6 +15,8 @@
# limitations under the License. # limitations under the License.
""" TF 2.0 CTRL model.""" """ TF 2.0 CTRL model."""
import warnings
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
@@ -242,10 +244,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.w.weight = value self.w.weight = value
self.w.vocab_size = value.shape[0] self.w.vocab_size = shape_list(value)[0]
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -618,6 +617,20 @@ class TFCTRLLMHead(tf.keras.layers.Layer):
self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias") self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias")
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.weight = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
hidden_states = hidden_states + self.bias hidden_states = hidden_states + self.bias
@@ -638,13 +651,11 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss):
self.lm_head = TFCTRLLMHead(config, self.transformer.w, name="lm_head") self.lm_head = TFCTRLLMHead(config, self.transformer.w, name="lm_head")
def get_output_embeddings(self): def get_lm_head(self):
return self.lm_head.input_embeddings
def get_output_layer_with_bias(self):
return self.lm_head return self.lm_head
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.lm_head.name return self.name + "/" + self.lm_head.name
def prepare_inputs_for_generation(self, inputs, past, **kwargs): def prepare_inputs_for_generation(self, inputs, past, **kwargs):

View File

@@ -16,6 +16,8 @@
TF 2.0 DistilBERT model TF 2.0 DistilBERT model
""" """
import warnings
import tensorflow as tf import tensorflow as tf
from ...activations_tf import get_tf_activation from ...activations_tf import get_tf_activation
@@ -39,7 +41,6 @@ from ...modeling_tf_utils import (
TFPreTrainedModel, TFPreTrainedModel,
TFQuestionAnsweringLoss, TFQuestionAnsweringLoss,
TFSequenceClassificationLoss, TFSequenceClassificationLoss,
TFSharedEmbeddings,
TFTokenClassificationLoss, TFTokenClassificationLoss,
get_initializer, get_initializer,
input_processing, input_processing,
@@ -72,9 +73,6 @@ class TFEmbeddings(tf.keras.layers.Layer):
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
self.dim = config.dim self.dim = config.dim
self.initializer_range = config.initializer_range self.initializer_range = config.initializer_range
self.word_embeddings = TFSharedEmbeddings(
config.vocab_size, config.dim, initializer_range=config.initializer_range, name="word_embeddings"
) # padding_idx=0)
self.position_embeddings = tf.keras.layers.Embedding( self.position_embeddings = tf.keras.layers.Embedding(
config.max_position_embeddings, config.max_position_embeddings,
config.dim, config.dim,
@@ -648,6 +646,20 @@ class TFDistilBertLMHead(tf.keras.layers.Layer):
self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias") self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias")
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.word_embeddings = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
hidden_states = hidden_states + self.bias hidden_states = hidden_states + self.bias
@@ -671,13 +683,11 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel
self.vocab_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-12, name="vocab_layer_norm") self.vocab_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-12, name="vocab_layer_norm")
self.vocab_projector = TFDistilBertLMHead(config, self.distilbert.embeddings, name="vocab_projector") self.vocab_projector = TFDistilBertLMHead(config, self.distilbert.embeddings, name="vocab_projector")
def get_output_embeddings(self): def get_lm_head(self):
return self.vocab_projector.input_embeddings
def get_output_layer_with_bias(self):
return self.vocab_projector return self.vocab_projector
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.vocab_projector.name return self.name + "/" + self.vocab_projector.name
@add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))

View File

@@ -577,6 +577,10 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder):
self.ctx_encoder = TFDPREncoderLayer(config, name="ctx_encoder") self.ctx_encoder = TFDPREncoderLayer(config, name="ctx_encoder")
def get_input_embeddings(self): def get_input_embeddings(self):
try:
return self.ctx_encoder.bert_model.get_input_embeddings()
except AttributeError:
self(self.dummy_inputs)
return self.ctx_encoder.bert_model.get_input_embeddings() return self.ctx_encoder.bert_model.get_input_embeddings()
@add_start_docstrings_to_model_forward(TF_DPR_ENCODERS_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(TF_DPR_ENCODERS_INPUTS_DOCSTRING)
@@ -671,6 +675,10 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder):
self.question_encoder = TFDPREncoderLayer(config, name="question_encoder") self.question_encoder = TFDPREncoderLayer(config, name="question_encoder")
def get_input_embeddings(self): def get_input_embeddings(self):
try:
return self.question_encoder.bert_model.get_input_embeddings()
except AttributeError:
self(self.dummy_inputs)
return self.question_encoder.bert_model.get_input_embeddings() return self.question_encoder.bert_model.get_input_embeddings()
@add_start_docstrings_to_model_forward(TF_DPR_ENCODERS_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(TF_DPR_ENCODERS_INPUTS_DOCSTRING)
@@ -764,6 +772,10 @@ class TFDPRReader(TFDPRPretrainedReader):
self.span_predictor = TFDPRSpanPredictorLayer(config, name="span_predictor") self.span_predictor = TFDPRSpanPredictorLayer(config, name="span_predictor")
def get_input_embeddings(self): def get_input_embeddings(self):
try:
return self.span_predictor.encoder.bert_model.get_input_embeddings()
except AttributeError:
self(self.dummy_inputs)
return self.span_predictor.encoder.bert_model.get_input_embeddings() return self.span_predictor.encoder.bert_model.get_input_embeddings()
@add_start_docstrings_to_model_forward(TF_DPR_READER_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(TF_DPR_READER_INPUTS_DOCSTRING)

View File

@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
""" TF Electra model. """ """ TF Electra model. """
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
@@ -511,10 +512,7 @@ class TFElectraMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -912,6 +910,20 @@ class TFElectraMaskedLMHead(tf.keras.layers.Layer):
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.word_embeddings = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states, training=False): def call(self, hidden_states, training=False):
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
hidden_states = hidden_states + self.bias hidden_states = hidden_states + self.bias
@@ -943,13 +955,11 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos
self.generator_lm_head = TFElectraMaskedLMHead(config, self.electra.embeddings, name="generator_lm_head") self.generator_lm_head = TFElectraMaskedLMHead(config, self.electra.embeddings, name="generator_lm_head")
def get_output_embeddings(self): def get_lm_head(self):
return self.electra.embeddings
def get_output_layer_with_bias(self):
return self.generator_lm_head return self.generator_lm_head
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.generator_lm_head.name return self.name + "/" + self.generator_lm_head.name
@add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))

View File

@@ -18,6 +18,7 @@
import itertools import itertools
import random import random
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
@@ -478,6 +479,10 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer):
def get_input_embeddings(self): def get_input_embeddings(self):
return self.embeddings return self.embeddings
def set_input_embeddings(self, value):
self.embeddings.weight = value
self.embeddings.vocab_size = shape_list(value)[0]
def call( def call(
self, self,
input_ids=None, input_ids=None,
@@ -721,6 +726,20 @@ class TFFlaubertPredLayer(tf.keras.layers.Layer):
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.weight = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
hidden_states = hidden_states + self.bias hidden_states = hidden_states + self.bias
@@ -767,13 +786,11 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel):
self.transformer = TFFlaubertMainLayer(config, name="transformer") self.transformer = TFFlaubertMainLayer(config, name="transformer")
self.pred_layer = TFFlaubertPredLayer(config, self.transformer.embeddings, name="pred_layer_._proj") self.pred_layer = TFFlaubertPredLayer(config, self.transformer.embeddings, name="pred_layer_._proj")
def get_output_embeddings(self): def get_lm_head(self):
return self.pred_layer.input_embeddings
def get_output_layer_with_bias(self):
return self.pred_layer return self.pred_layer
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.pred_layer.name return self.name + "/" + self.pred_layer.name
def prepare_inputs_for_generation(self, inputs, **kwargs): def prepare_inputs_for_generation(self, inputs, **kwargs):

View File

@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
""" TF 2.0 Funnel model. """ """ TF 2.0 Funnel model. """
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
@@ -787,7 +788,7 @@ class TFFunnelBaseLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
raise NotImplementedError # Not implemented yet in the library fr TF 2.0 models raise NotImplementedError # Not implemented yet in the library fr TF 2.0 models
@@ -873,7 +874,7 @@ class TFFunnelMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
raise NotImplementedError # Not implemented yet in the library fr TF 2.0 models raise NotImplementedError # Not implemented yet in the library fr TF 2.0 models
@@ -992,6 +993,20 @@ class TFFunnelMaskedLMHead(tf.keras.layers.Layer):
self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias") self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias")
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.word_embeddings = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states, training=False): def call(self, hidden_states, training=False):
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
hidden_states = hidden_states + self.bias hidden_states = hidden_states + self.bias
@@ -1349,13 +1364,11 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss)
self.funnel = TFFunnelMainLayer(config, name="funnel") self.funnel = TFFunnelMainLayer(config, name="funnel")
self.lm_head = TFFunnelMaskedLMHead(config, self.funnel.embeddings, name="lm_head") self.lm_head = TFFunnelMaskedLMHead(config, self.funnel.embeddings, name="lm_head")
def get_output_embeddings(self): def get_lm_head(self):
return self.funnel.embeddings
def get_output_layer_with_bias(self):
return self.lm_head return self.lm_head
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.lm_head.name return self.name + "/" + self.lm_head.name
@add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length"))

View File

@@ -243,7 +243,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.wte.weight = value self.wte.weight = value
self.wte.vocab_size = self.wte.weight.shape[0] self.wte.vocab_size = shape_list(value)[0]
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -653,7 +653,10 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
self.transformer = TFGPT2MainLayer(config, name="transformer") self.transformer = TFGPT2MainLayer(config, name="transformer")
def get_output_embeddings(self): def get_output_embeddings(self):
return self.transformer.wte return self.get_input_embeddings()
def set_output_embeddings(self, value):
self.set_input_embeddings(value)
def prepare_inputs_for_generation(self, inputs, past, **kwargs): def prepare_inputs_for_generation(self, inputs, past, **kwargs):
# only last token for inputs_ids if past is defined in kwargs # only last token for inputs_ids if past is defined in kwargs
@@ -771,9 +774,6 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
config, initializer_range=config.initializer_range, name="multiple_choice_head" config, initializer_range=config.initializer_range, name="multiple_choice_head"
) )
def get_output_embeddings(self):
return self.transformer.wte
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=TFGPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=TFGPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC)
def call( def call(
@@ -945,9 +945,6 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
) )
self.transformer = TFGPT2MainLayer(config, name="transformer") self.transformer = TFGPT2MainLayer(config, name="transformer")
def get_output_embeddings(self):
return self.transformer.wte
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,

View File

@@ -1182,6 +1182,44 @@ class TFLEDPreTrainedModel(TFPreTrainedModel):
} }
return dummy_inputs return dummy_inputs
def get_input_embeddings(self):
base_model = getattr(self, self.base_model_prefix, self)
return base_model.shared
def set_input_embeddings(self, value):
base_model = getattr(self, self.base_model_prefix, self)
try:
base_model.shared.weight = value
except AttributeError:
self(self.dummy_inputs)
base_model.shared.weight = value
base_model.shared.vocab_size = shape_list(base_model.shared.weight)[0]
with tf.compat.v1.variable_scope("model.shared") as shared_abs_scope_name:
pass
embed_tokens = TFWrappedEmbeddings(base_model.shared, abs_scope_name=shared_abs_scope_name)
base_model.encoder.set_embed_tokens(embed_tokens)
base_model.decoder.set_embed_tokens(embed_tokens)
@tf.function(
input_signature=[
{
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
"decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
"decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
}
]
)
def serving(self, inputs):
output = self.call(inputs)
return self.serving_output(output)
@dataclass @dataclass
# Copied from transformers.models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput with TFLongformer->TFLEDEncoder # Copied from transformers.models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput with TFLongformer->TFLEDEncoder
@@ -1483,6 +1521,9 @@ class TFLEDEncoder(tf.keras.layers.Layer):
self.layers = [TFLEDEncoderLayer(config, i, name=f"layers.{i}") for i in range(config.encoder_layers)] self.layers = [TFLEDEncoderLayer(config, i, name=f"layers.{i}") for i in range(config.encoder_layers)]
self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding") self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding")
def set_embed_tokens(self, embed_tokens):
self.embed_tokens = embed_tokens
def call( def call(
self, self,
input_ids=None, input_ids=None,
@@ -1714,6 +1755,9 @@ class TFLEDDecoder(tf.keras.layers.Layer):
self.dropout = tf.keras.layers.Dropout(config.dropout) self.dropout = tf.keras.layers.Dropout(config.dropout)
def set_embed_tokens(self, embed_tokens):
self.embed_tokens = embed_tokens
def call( def call(
self, self,
input_ids=None, input_ids=None,
@@ -1921,6 +1965,9 @@ class TFLEDModel(TFLEDPreTrainedModel):
self.encoder = TFLEDEncoder(config, embed_tokens, name="encoder") self.encoder = TFLEDEncoder(config, embed_tokens, name="encoder")
self.decoder = TFLEDDecoder(config, embed_tokens, name="decoder") self.decoder = TFLEDDecoder(config, embed_tokens, name="decoder")
def get_encoder(self):
return self.encoder
def get_decoder(self): def get_decoder(self):
return self.decoder return self.decoder
@@ -2047,15 +2094,6 @@ class TFLEDModel(TFLEDPreTrainedModel):
encoder_global_attentions=enc_g_attns, encoder_global_attentions=enc_g_attns,
) )
def get_input_embeddings(self):
return self.shared
def set_input_embeddings(self, value):
self.shared = value
def get_output_embeddings(self):
return self.shared
@add_start_docstrings( @add_start_docstrings(
"The LED Model with a language modeling head. Can be used for summarization.", "The LED Model with a language modeling head. Can be used for summarization.",
@@ -2079,22 +2117,20 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel):
def get_decoder(self): def get_decoder(self):
return self.led.decoder return self.led.decoder
def resize_token_embeddings(self, new_num_tokens): def get_encoder(self):
super().resize_token_embeddings(new_num_tokens=new_num_tokens) return self.led.encoder
# LED is a special case where the bias has two dimensions def get_bias(self):
# and not named just `bias` return {"final_logits_bias": self.final_logits_bias}
if new_num_tokens is not None:
num_tokens_to_copy = min(shape_list(self.final_logits_bias), new_num_tokens) def set_bias(self, value):
init_bias = tf.zeros((new_num_tokens,)) self.final_logits_bias = value["final_logits_bias"]
init_bias[:num_tokens_to_copy] = self.final_logits_bias.value()[:num_tokens_to_copy]
self.final_logits_bias = self.add_weight( def get_output_embeddings(self):
shape=(1, new_num_tokens), return self.get_input_embeddings()
initializer="zeros",
trainable=False, def set_output_embeddings(self, value):
name="final_logits_bias", self.set_input_embeddings(value)
)
self.final_logits_bias.assign(init_bias)
@add_start_docstrings_to_model_forward(LED_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(LED_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=TFLEDSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=TFLEDSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
@@ -2266,12 +2302,6 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel):
) )
return (past[0], reordered_past) return (past[0], reordered_past)
def get_output_embeddings(self):
return self.led.shared
def get_encoder(self):
return self.led.encoder
def compute_loss(self, labels, logits): def compute_loss(self, labels, logits):
"""CrossEntropyLoss that ignores pad tokens""" """CrossEntropyLoss that ignores pad tokens"""
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy( loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(

View File

@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
"""Tensorflow Longformer model. """ """Tensorflow Longformer model. """
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
@@ -437,6 +438,20 @@ class TFLongformerLMHead(tf.keras.layers.Layer):
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.decoder
def set_output_embeddings(self, value):
self.decoder.word_embeddings = value
self.decoder.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.dense(hidden_states) hidden_states = self.dense(hidden_states)
hidden_states = self.act(hidden_states) hidden_states = self.act(hidden_states)
@@ -1602,7 +1617,7 @@ class TFLongformerMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -2040,13 +2055,11 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel
self.longformer = TFLongformerMainLayer(config, add_pooling_layer=False, name="longformer") self.longformer = TFLongformerMainLayer(config, add_pooling_layer=False, name="longformer")
self.lm_head = TFLongformerLMHead(config, self.longformer.embeddings, name="lm_head") self.lm_head = TFLongformerLMHead(config, self.longformer.embeddings, name="lm_head")
def get_output_embeddings(self): def get_lm_head(self):
return self.lm_head.decoder
def get_output_layer_with_bias(self):
return self.lm_head return self.lm_head
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.lm_head.name return self.name + "/" + self.lm_head.name
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))

View File

@@ -16,6 +16,7 @@
# limitations under the License. # limitations under the License.
""" TF 2.0 LXMERT model. """ """ TF 2.0 LXMERT model. """
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, Optional, Tuple from typing import Dict, Optional, Tuple
@@ -706,10 +707,7 @@ class TFLxmertMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
raise NotImplementedError raise NotImplementedError
@@ -1103,6 +1101,20 @@ class TFLxmertLMPredictionHead(tf.keras.layers.Layer):
self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias") self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias")
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.word_embeddings = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.transform(hidden_states) hidden_states = self.transform(hidden_states)
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
@@ -1292,13 +1304,11 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
**({"obj_labels": obj_labels} if self.config.task_obj_predict else {}), **({"obj_labels": obj_labels} if self.config.task_obj_predict else {}),
} }
def get_output_embeddings(self): def get_lm_head(self):
return self.lxmert.embeddings
def get_output_layer_with_bias(self):
return self.cls.predictions return self.cls.predictions
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.cls.name + "/" + self.cls.predictions.name return self.name + "/" + self.cls.name + "/" + self.cls.predictions.name
@add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING)

View File

@@ -15,6 +15,7 @@
# limitations under the License. # limitations under the License.
""" TF 2.0 MobileBERT model. """ """ TF 2.0 MobileBERT model. """
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
@@ -665,6 +666,20 @@ class TFMobileBertLMPredictionHead(tf.keras.layers.Layer):
) )
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self
def set_output_embeddings(self, value):
self.decoder = value
self.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.transform(hidden_states) hidden_states = self.transform(hidden_states)
hidden_states = tf.matmul(hidden_states, tf.concat([tf.transpose(self.decoder), self.dense], axis=0)) hidden_states = tf.matmul(hidden_states, tf.concat([tf.transpose(self.decoder), self.dense], axis=0))
@@ -704,10 +719,7 @@ class TFMobileBertMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -1039,13 +1051,11 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel):
self.predictions = TFMobileBertMLMHead(config, name="predictions___cls") self.predictions = TFMobileBertMLMHead(config, name="predictions___cls")
self.seq_relationship = TFMobileBertOnlyNSPHead(2, name="seq_relationship___cls") self.seq_relationship = TFMobileBertOnlyNSPHead(2, name="seq_relationship___cls")
def get_output_embeddings(self): def get_lm_head(self):
return self.predictions.predictions
def get_output_layer_with_bias(self):
return self.predictions.predictions return self.predictions.predictions
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.predictions.name + "/" + self.predictions.predictions.name return self.name + "/" + self.predictions.name + "/" + self.predictions.predictions.name
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1149,13 +1159,11 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
self.mobilebert = TFMobileBertMainLayer(config, add_pooling_layer=False, name="mobilebert") self.mobilebert = TFMobileBertMainLayer(config, add_pooling_layer=False, name="mobilebert")
self.mlm = TFMobileBertMLMHead(config, name="mlm___cls") self.mlm = TFMobileBertMLMHead(config, name="mlm___cls")
def get_output_embeddings(self): def get_lm_head(self):
return self.mlm.predictions
def get_output_layer_with_bias(self):
return self.mlm.predictions return self.mlm.predictions
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))

View File

@@ -17,6 +17,7 @@
import math import math
import warnings
import tensorflow as tf import tensorflow as tf
@@ -541,7 +542,7 @@ class TFMPNetMainLayer(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.set_input_embeddings # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.set_input_embeddings
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer._prune_heads # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer._prune_heads
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
@@ -840,6 +841,20 @@ class TFMPNetLMHead(tf.keras.layers.Layer):
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.decoder
def set_output_embeddings(self, value):
self.decoder.word_embeddings = value
self.decoder.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, features): def call(self, features):
x = self.dense(features) x = self.dense(features)
x = self.act(x) x = self.act(x)
@@ -862,13 +877,11 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss):
self.mpnet = TFMPNetMainLayer(config, name="mpnet") self.mpnet = TFMPNetMainLayer(config, name="mpnet")
self.lm_head = TFMPNetLMHead(config, self.mpnet.embeddings, name="lm_head") self.lm_head = TFMPNetLMHead(config, self.mpnet.embeddings, name="lm_head")
def get_output_embeddings(self): def get_lm_head(self):
return self.mpnet.embeddings
def get_output_layer_with_bias(self):
return self.lm_head return self.lm_head
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.lm_head.name return self.name + "/" + self.lm_head.name
@add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length"))

View File

@@ -219,7 +219,7 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.tokens_embed.weight = value self.tokens_embed.weight = value
self.tokens_embed.vocab_size = value.shape[0] self.tokens_embed.vocab_size = shape_list(value)[0]
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -577,7 +577,10 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin
self.transformer = TFOpenAIGPTMainLayer(config, name="transformer") self.transformer = TFOpenAIGPTMainLayer(config, name="transformer")
def get_output_embeddings(self): def get_output_embeddings(self):
return self.transformer.tokens_embed return self.get_input_embeddings()
def set_output_embeddings(self, value):
self.set_input_embeddings(value)
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
@@ -682,9 +685,6 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
config, initializer_range=config.initializer_range, name="multiple_choice_head" config, initializer_range=config.initializer_range, name="multiple_choice_head"
) )
def get_output_embeddings(self):
return self.transformer.tokens_embed
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=TFOpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=TFOpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC)
def call( def call(
@@ -841,9 +841,6 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc
) )
self.transformer = TFOpenAIGPTMainLayer(config, name="transformer") self.transformer = TFOpenAIGPTMainLayer(config, name="transformer")
def get_output_embeddings(self):
return self.transformer.tokens_embed
@add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,

View File

@@ -15,6 +15,8 @@
# limitations under the License. # limitations under the License.
""" TF 2.0 RoBERTa model. """ """ TF 2.0 RoBERTa model. """
import warnings
import tensorflow as tf import tensorflow as tf
from ...activations_tf import get_tf_activation from ...activations_tf import get_tf_activation
@@ -502,7 +504,7 @@ class TFRobertaMainLayer(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.set_input_embeddings # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.set_input_embeddings
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.word_embeddings = value self.embeddings.word_embeddings = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer._prune_heads # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer._prune_heads
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
@@ -827,6 +829,20 @@ class TFRobertaLMHead(tf.keras.layers.Layer):
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.decoder
def set_output_embeddings(self, value):
self.decoder.word_embeddings = value
self.decoder.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.dense(hidden_states) hidden_states = self.dense(hidden_states)
hidden_states = self.act(hidden_states) hidden_states = self.act(hidden_states)
@@ -849,13 +865,11 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos
self.roberta = TFRobertaMainLayer(config, add_pooling_layer=False, name="roberta") self.roberta = TFRobertaMainLayer(config, add_pooling_layer=False, name="roberta")
self.lm_head = TFRobertaLMHead(config, self.roberta.embeddings, name="lm_head") self.lm_head = TFRobertaLMHead(config, self.roberta.embeddings, name="lm_head")
def get_output_embeddings(self): def get_lm_head(self):
return self.lm_head.decoder
def get_output_layer_with_bias(self):
return self.lm_head return self.lm_head
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.lm_head.name return self.name + "/" + self.lm_head.name
@add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))

View File

@@ -573,15 +573,6 @@ class TFT5MainLayer(tf.keras.layers.Layer):
self.final_layer_norm = TFT5LayerNorm(epsilon=config.layer_norm_epsilon, name="final_layer_norm") self.final_layer_norm = TFT5LayerNorm(epsilon=config.layer_norm_epsilon, name="final_layer_norm")
self.dropout = tf.keras.layers.Dropout(config.dropout_rate) self.dropout = tf.keras.layers.Dropout(config.dropout_rate)
def get_input_embeddings(self):
return self.embed_tokens
def set_embed_tokens(self, embed_tokens):
self.embed_tokens = embed_tokens
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError # Not implemented yet in the library fr TF 2.0 models
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
raise NotImplementedError # Not implemented yet in the library fr TF 2.0 models raise NotImplementedError # Not implemented yet in the library fr TF 2.0 models
@@ -839,6 +830,26 @@ class TFT5PreTrainedModel(TFPreTrainedModel):
return self.serving_output(output) return self.serving_output(output)
def get_input_embeddings(self):
return self.shared
def set_input_embeddings(self, value):
try:
self.shared.weight = value
except AttributeError:
self(self.dummy_inputs)
self.shared.weight = value
self.shared.vocab_size = shape_list(value)[0]
# retrieve correct absolute scope for embed token wrapper
with tf.compat.v1.variable_scope("shared") as shared_abs_scope_name:
pass
# Wraps layer to avoid problems with weight restoring and ensuring we're in the correct TF scope.
embed_tokens = TFWrappedEmbeddings(self.shared, abs_scope_name=shared_abs_scope_name)
self.encoder.embed_tokens = embed_tokens
if hasattr(self, "decoder"):
self.decoder.embed_tokens = embed_tokens
def _shift_right(self, input_ids): def _shift_right(self, input_ids):
decoder_start_token_id = self.config.decoder_start_token_id decoder_start_token_id = self.config.decoder_start_token_id
pad_token_id = self.config.pad_token_id pad_token_id = self.config.pad_token_id
@@ -1050,20 +1061,6 @@ class TFT5Model(TFT5PreTrainedModel):
decoder_config.is_decoder = True decoder_config.is_decoder = True
self.decoder = TFT5MainLayer(decoder_config, embed_tokens, name="decoder") self.decoder = TFT5MainLayer(decoder_config, embed_tokens, name="decoder")
def get_input_embeddings(self):
return self.shared
def set_input_embeddings(self, new_embeddings):
self.shared.weight = new_embeddings
self.shared.vocab_size = self.shared.weight.shape[0]
# retrieve correct absolute scope for embed token wrapper
with tf.compat.v1.variable_scope("shared") as shared_abs_scope_name:
pass
# Wraps layer to avoid problems with weight restoring and ensuring we're in the correct TF scope.
embed_tokens = TFWrappedEmbeddings(self.shared, abs_scope_name=shared_abs_scope_name)
self.encoder.set_embed_tokens(embed_tokens)
self.decoder.set_embed_tokens(embed_tokens)
def get_encoder(self): def get_encoder(self):
return self.encoder return self.encoder
@@ -1222,24 +1219,23 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
if not config.tie_word_embeddings: if not config.tie_word_embeddings:
self.lm_head = tf.keras.layers.Dense(config.vocab_size, use_bias=False, name="lm_head") self.lm_head = tf.keras.layers.Dense(config.vocab_size, use_bias=False, name="lm_head")
def get_input_embeddings(self):
return self.shared
def get_output_embeddings(self): def get_output_embeddings(self):
if self.config.tie_word_embeddings: if self.config.tie_word_embeddings:
return self.shared return self.get_input_embeddings()
else: else:
return self.lm_head # in a dense layer the kernel has a shape (last_dim, units), for us (dim, num_tokens)
# value has a shape (num_tokens, dim) then needs to be transposed
return tf.transpose(self.lm_head.kernel)
def set_input_embeddings(self, new_embeddings): def set_output_embeddings(self, value):
self.shared.weight = new_embeddings if self.config.tie_word_embeddings:
# retrieve correct absolute scope for embed token wrapper self.set_input_embeddings(value)
with tf.compat.v1.variable_scope("shared") as shared_abs_scope_name: else:
pass self.lm_head = tf.keras.layers.Dense(shape_list(value)[0], use_bias=False, name="lm_head")
# Wraps layer to avoid problems with weight restoring and ensuring we're in the correct TF scope. # in a dense layer the kernel has a shape (last_dim, units), for us (dim, num_tokens)
embed_tokens = TFWrappedEmbeddings(self.shared, abs_scope_name=shared_abs_scope_name) # value has a shape (num_tokens, dim) then needs to be transposed
self.encoder.set_embed_tokens(embed_tokens) transposed_value = tf.transpose(value)
self.decoder.set_embed_tokens(embed_tokens) self.lm_head.kernel = transposed_value
def get_encoder(self): def get_encoder(self):
return self.encoder return self.encoder
@@ -1358,9 +1354,9 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
# T5v1.1 does not tie output word embeddings and thus does not require downscaling # T5v1.1 does not tie output word embeddings and thus does not require downscaling
if self.config.tie_word_embeddings: if self.config.tie_word_embeddings:
sequence_output = sequence_output * (self.model_dim ** -0.5) sequence_output = sequence_output * (self.model_dim ** -0.5)
logits = self.get_output_embeddings()(sequence_output, mode="linear") logits = self.shared(sequence_output, mode="linear")
else: else:
logits = self.get_output_embeddings()(sequence_output) logits = self.lm_head(sequence_output)
loss = None if inputs["labels"] is None else self.compute_loss(inputs["labels"], logits) loss = None if inputs["labels"] is None else self.compute_loss(inputs["labels"], logits)
@@ -1488,19 +1484,6 @@ class TFT5EncoderModel(TFT5PreTrainedModel):
encoder_config.use_cache = False encoder_config.use_cache = False
self.encoder = TFT5MainLayer(encoder_config, embed_tokens, name="encoder") self.encoder = TFT5MainLayer(encoder_config, embed_tokens, name="encoder")
def get_input_embeddings(self):
return self.shared
def set_input_embeddings(self, new_embeddings):
self.shared.weight = new_embeddings
self.shared.vocab_size = self.shared.weight.shape[0]
# retrieve correct absolute scope for embed token wrapper
with tf.compat.v1.variable_scope("shared") as shared_abs_scope_name:
pass
# Wraps layer to avoid problems with weight restoring and ensuring we're in the correct TF scope.
embed_tokens = TFWrappedEmbeddings(self.shared, abs_scope_name=shared_abs_scope_name)
self.encoder.set_embed_tokens(embed_tokens)
def get_encoder(self): def get_encoder(self):
return self.encoder return self.encoder

View File

@@ -468,9 +468,6 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
raise NotImplementedError raise NotImplementedError
def _resize_token_embeddings(self, new_num_tokens):
return self.word_emb
def backward_compatible(self): def backward_compatible(self):
self.sample_softmax = -1 self.sample_softmax = -1
@@ -909,25 +906,6 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
) )
class TFTransfoXLMHead(tf.keras.layers.Layer):
def __init__(self, config, input_embeddings, **kwargs):
super().__init__(**kwargs)
self.vocab_size = config.vocab_size
# The output weights are the same as the input embeddings, but there is
# an output-only bias for each token.
self.input_embeddings = input_embeddings
def build(self, input_shape):
self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias")
super().build(input_shape)
def call(self, hidden_states):
hidden_states = self.input_embeddings(hidden_states, mode="linear")
hidden_states = hidden_states + self.bias
return hidden_states
@add_start_docstrings( @add_start_docstrings(
""" """
The Transformer-XL Model with a language modeling head on top (adaptive softmax with weights tied to the adaptive The Transformer-XL Model with a language modeling head on top (adaptive softmax with weights tied to the adaptive
@@ -948,6 +926,9 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
config.vocab_size, config.d_embed, config.d_model, config.cutoffs, div_val=config.div_val, name="crit" config.vocab_size, config.d_embed, config.d_model, config.cutoffs, div_val=config.div_val, name="crit"
) )
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError()
def get_output_embeddings(self): def get_output_embeddings(self):
"""Double-check if you are using adaptive softmax.""" """Double-check if you are using adaptive softmax."""
if len(self.crit.out_layers) > 0: if len(self.crit.out_layers) > 0:

View File

@@ -17,6 +17,7 @@
""" """
import itertools import itertools
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
@@ -330,10 +331,7 @@ class TFXLMMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.embeddings.weight = value self.embeddings.weight = value
self.embeddings.vocab_size = value.shape[0] self.embeddings.vocab_size = shape_list(value)[0]
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" """
@@ -787,6 +785,20 @@ class TFXLMPredLayer(tf.keras.layers.Layer):
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.weight = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
hidden_states = hidden_states + self.bias hidden_states = hidden_states + self.bias
@@ -807,13 +819,11 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
self.transformer = TFXLMMainLayer(config, name="transformer") self.transformer = TFXLMMainLayer(config, name="transformer")
self.pred_layer = TFXLMPredLayer(config, self.transformer.embeddings, name="pred_layer_._proj") self.pred_layer = TFXLMPredLayer(config, self.transformer.embeddings, name="pred_layer_._proj")
def get_output_embeddings(self): def get_lm_head(self):
return self.pred_layer.input_embeddings
def get_output_layer_with_bias(self):
return self.pred_layer return self.pred_layer
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.pred_layer.name return self.name + "/" + self.pred_layer.name
def prepare_inputs_for_generation(self, inputs, **kwargs): def prepare_inputs_for_generation(self, inputs, **kwargs):

View File

@@ -17,6 +17,7 @@
TF 2.0 XLNet model. TF 2.0 XLNet model.
""" """
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
@@ -407,6 +408,20 @@ class TFXLNetLMHead(tf.keras.layers.Layer):
self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias") self.bias = self.add_weight(shape=(self.vocab_size,), initializer="zeros", trainable=True, name="bias")
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.weight = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
hidden_states = hidden_states + self.bias hidden_states = hidden_states + self.bias
@@ -450,7 +465,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
def set_input_embeddings(self, value): def set_input_embeddings(self, value):
self.word_embedding.weight = value self.word_embedding.weight = value
self.word_embedding.vocab_size = value.shape[0] self.word_embedding.vocab_size = shape_list(value)[0]
def build(self, input_shape): def build(self, input_shape):
initializer = get_initializer(self.initializer_range) initializer = get_initializer(self.initializer_range)
@@ -458,9 +473,6 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
shape=(1, 1, self.d_model), initializer=initializer, trainable=True, name="mask_emb" shape=(1, 1, self.d_model), initializer=initializer, trainable=True, name="mask_emb"
) )
def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
raise NotImplementedError raise NotImplementedError
@@ -1227,13 +1239,11 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss):
self.transformer = TFXLNetMainLayer(config, name="transformer") self.transformer = TFXLNetMainLayer(config, name="transformer")
self.lm_loss = TFXLNetLMHead(config, self.transformer.word_embedding, name="lm_loss") self.lm_loss = TFXLNetLMHead(config, self.transformer.word_embedding, name="lm_loss")
def get_output_embeddings(self): def get_lm_head(self):
return self.lm_loss.input_embeddings
def get_output_layer_with_bias(self):
return self.lm_loss return self.lm_loss
def get_prefix_bias_name(self): def get_prefix_bias_name(self):
warnings.warn("The method get_prefix_bias_name is deprecated. Please use `get_bias` instead.", FutureWarning)
return self.name + "/" + self.lm_loss.name return self.name + "/" + self.lm_loss.name
def prepare_inputs_for_generation(self, inputs, past, use_mems=None, **kwargs): def prepare_inputs_for_generation(self, inputs, past, use_mems=None, **kwargs):

View File

@@ -461,6 +461,20 @@ class TF{{cookiecutter.camelcase_modelname}}LMPredictionHead(tf.keras.layers.Lay
super().build(input_shape) super().build(input_shape)
def get_output_embeddings(self):
return self.input_embeddings.word_embeddings
def set_output_embeddings(self, value):
self.input_embeddings.word_embeddings = value
self.input_embeddings.vocab_size = shape_list(value)[0]
def get_bias(self):
return {"bias": self.bias}
def set_bias(self, value):
self.bias = value["bias"]
self.vocab_size = shape_list(value["bias"])[0]
def call(self, hidden_states): def call(self, hidden_states):
hidden_states = self.transform(hidden_states) hidden_states = self.transform(hidden_states)
hidden_states = self.input_embeddings(hidden_states, mode="linear") hidden_states = self.input_embeddings(hidden_states, mode="linear")
@@ -800,15 +814,9 @@ class TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(TF{{cookiecutter.camelca
self.{{cookiecutter.lowercase_modelname}} = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="{{cookiecutter.lowercase_modelname}}") self.{{cookiecutter.lowercase_modelname}} = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="{{cookiecutter.lowercase_modelname}}")
self.mlm = TF{{cookiecutter.camelcase_modelname}}MLMHead(config, self.{{cookiecutter.lowercase_modelname}}.embeddings, name="mlm___cls") self.mlm = TF{{cookiecutter.camelcase_modelname}}MLMHead(config, self.{{cookiecutter.lowercase_modelname}}.embeddings, name="mlm___cls")
def get_output_embeddings(self): def get_lm_head(self):
return self.{{cookiecutter.lowercase_modelname}}.embeddings
def get_output_layer_with_bias(self):
return self.mlm.predictions return self.mlm.predictions
def get_prefix_bias_name(self):
return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name
@add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
@@ -903,15 +911,9 @@ class TF{{cookiecutter.camelcase_modelname}}ForCausalLM(TF{{cookiecutter.camelca
self.{{cookiecutter.lowercase_modelname}} = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="{{cookiecutter.lowercase_modelname}}") self.{{cookiecutter.lowercase_modelname}} = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="{{cookiecutter.lowercase_modelname}}")
self.mlm = TF{{cookiecutter.camelcase_modelname}}MLMHead(config, self.{{cookiecutter.lowercase_modelname}}.embeddings, name="mlm___cls") self.mlm = TF{{cookiecutter.camelcase_modelname}}MLMHead(config, self.{{cookiecutter.lowercase_modelname}}.embeddings, name="mlm___cls")
def get_output_embeddings(self): def get_lm_head(self):
return self.{{cookiecutter.lowercase_modelname}}.embeddings
def get_output_layer_with_bias(self):
return self.mlm.predictions return self.mlm.predictions
def get_prefix_bias_name(self):
return self.name + "/" + self.mlm.name + "/" + self.mlm.predictions.name
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint="{{cookiecutter.checkpoint_identifier}}", checkpoint="{{cookiecutter.checkpoint_identifier}}",
@@ -1855,6 +1857,29 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
} }
return dummy_inputs return dummy_inputs
def get_input_embeddings(self):
base_model = getattr(self, self.base_model_prefix, self)
return base_model.shared
def set_input_embeddings(self, value):
base_model = getattr(self, self.base_model_prefix, self)
try:
base_model.shared.weight = value
except AttributeError:
self(self.dummy_inputs)
base_model.shared.weight = value
base_model.shared.vocab_size = shape_list(base_model.shared.weight)[0]
with tf.compat.v1.variable_scope("model.shared") as shared_abs_scope_name:
pass
embed_tokens = TFWrappedEmbeddings(base_model.shared, abs_scope_name=shared_abs_scope_name)
base_model.encoder.set_embed_tokens(embed_tokens)
base_model.decoder.set_embed_tokens(embed_tokens)
@tf.function( @tf.function(
input_signature=[ input_signature=[
{ {
@@ -1984,6 +2009,9 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
self.layers = [TF{{cookiecutter.camelcase_modelname}}EncoderLayer(config, name=f"layers.{i}") for i in range(config.encoder_layers)] self.layers = [TF{{cookiecutter.camelcase_modelname}}EncoderLayer(config, name=f"layers.{i}") for i in range(config.encoder_layers)]
self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding") self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding")
def set_embed_tokens(self, embed_tokens):
self.embed_tokens = embed_tokens
def call( def call(
self, self,
input_ids=None, input_ids=None,
@@ -2124,6 +2152,9 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
self.dropout = tf.keras.layers.Dropout(config.dropout) self.dropout = tf.keras.layers.Dropout(config.dropout)
def set_embed_tokens(self, embed_tokens):
self.embed_tokens = embed_tokens
def call( def call(
self, self,
input_ids=None, input_ids=None,
@@ -2331,6 +2362,9 @@ class TF{{cookiecutter.camelcase_modelname}}Model(TF{{cookiecutter.camelcase_mod
self.encoder = TF{{cookiecutter.camelcase_modelname}}Encoder(config, embed_tokens, name="encoder") self.encoder = TF{{cookiecutter.camelcase_modelname}}Encoder(config, embed_tokens, name="encoder")
self.decoder = TF{{cookiecutter.camelcase_modelname}}Decoder(config, embed_tokens, name="decoder") self.decoder = TF{{cookiecutter.camelcase_modelname}}Decoder(config, embed_tokens, name="decoder")
def get_encoder(self):
return self.encoder
def get_decoder(self): def get_decoder(self):
return self.decoder return self.decoder
@@ -2452,15 +2486,6 @@ class TF{{cookiecutter.camelcase_modelname}}Model(TF{{cookiecutter.camelcase_mod
encoder_attentions=enc_attns, encoder_attentions=enc_attns,
) )
def get_input_embeddings(self):
return self.shared
def set_input_embeddings(self, value):
self.shared = value
def get_output_embeddings(self):
return self.shared
@add_start_docstrings( @add_start_docstrings(
"The {{cookiecutter.uppercase_modelname}} Model with a language modeling head. Can be used for summarization.", "The {{cookiecutter.uppercase_modelname}} Model with a language modeling head. Can be used for summarization.",
@@ -2484,22 +2509,20 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
def get_decoder(self): def get_decoder(self):
return self.model.decoder return self.model.decoder
def resize_token_embeddings(self, new_num_tokens): def get_encoder(self):
super().resize_token_embeddings(new_num_tokens=new_num_tokens) return self.model.encoder
# {{cookiecutter.uppercase_modelname}} is a special case where the bias has two dimensions def get_bias(self):
# and not named just `bias` return {"final_logits_bias": self.final_logits_bias}
if new_num_tokens is not None:
num_tokens_to_copy = min(shape_list(self.final_logits_bias)[0], new_num_tokens) def set_bias(self, value):
init_bias = tf.zeros((new_num_tokens,)) self.final_logits_bias = value["final_logits_bias"]
init_bias[:num_tokens_to_copy] = self.final_logits_bias.value()[:num_tokens_to_copy]
self.final_logits_bias = self.add_weight( def get_output_embeddings(self):
shape=(1, new_num_tokens), return self.get_input_embeddings()
initializer="zeros",
trainable=False, def set_output_embeddings(self, value):
name="final_logits_bias", self.set_input_embeddings(value)
)
self.final_logits_bias.assign(init_bias)
@add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
@@ -2664,12 +2687,6 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
) )
return (past[0], reordered_past) return (past[0], reordered_past)
def get_output_embeddings(self):
return self.model.shared
def get_encoder(self):
return self.model.encoder
def compute_loss(self, labels, logits): def compute_loss(self, labels, logits):
"""CrossEntropyLoss that ignores pad tokens""" """CrossEntropyLoss that ignores pad tokens"""
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy( loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(

View File

@@ -486,11 +486,83 @@ class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unitte
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_resize_token_embeddings(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
return None
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings
model = model_class(config=config)
old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
old_final_logits_bias = model.get_bias()
# reshape the embeddings
model.resize_token_embeddings(size)
new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
new_final_logits_bias = model.get_bias()
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing
models_equal = True
for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_final_logits_bias is not None and new_final_logits_bias is not None:
old_final_logits_bias = old_final_logits_bias["final_logits_bias"]
new_final_logits_bias = new_final_logits_bias["final_logits_bias"]
self.assertEqual(new_final_logits_bias.shape[0], 1)
self.assertEqual(new_final_logits_bias.shape[1], assert_size)
models_equal = True
for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()):
for p1, p2 in zip(old, new):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error.""" """If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""

View File

@@ -274,13 +274,23 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
def test_model_common_attributes(self): def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = [TFAlbertForPreTraining, TFAlbertForMaskedLM]
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in list_lm_models:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
@slow @slow

View File

@@ -159,11 +159,83 @@ class TFBartModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_resize_token_embeddings(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
return None
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings
model = model_class(config=config)
old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
old_final_logits_bias = model.get_bias()
# reshape the embeddings
model.resize_token_embeddings(size)
new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
new_final_logits_bias = model.get_bias()
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing
models_equal = True
for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_final_logits_bias is not None and new_final_logits_bias is not None:
old_final_logits_bias = old_final_logits_bias["final_logits_bias"]
new_final_logits_bias = new_final_logits_bias["final_logits_bias"]
self.assertEqual(new_final_logits_bias.shape[0], 1)
self.assertEqual(new_final_logits_bias.shape[1], assert_size)
models_equal = True
for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()):
for p1, p2 in zip(old, new):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI # This test is too long (>30sec) and makes fail the CI
pass pass

View File

@@ -340,15 +340,17 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models: if model_class in list_lm_models:
x = model.get_output_layer_with_bias() x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer) assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name() name = model.get_bias()
assert isinstance(name, str) assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else: else:
x = model.get_output_layer_with_bias() x = model.get_output_embeddings()
assert x is None
name = model.get_prefix_bias_name()
assert x is None assert x is None
name = model.get_bias()
assert name is None
def test_custom_load_tf_weights(self): def test_custom_load_tf_weights(self):
model, output_loading_info = TFBertForTokenClassification.from_pretrained( model, output_loading_info = TFBertForTokenClassification.from_pretrained(

View File

@@ -57,29 +57,93 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase):
# inputs_embeds not supported # inputs_embeds not supported
pass pass
def test_saved_model_with_hidden_states_output(self):
# Should be uncommented during patrick TF refactor
pass
def test_saved_model_with_attentions_output(self):
# Should be uncommented during patrick TF refactor
pass
def test_model_common_attributes(self): def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI # This test is too long (>30sec) and makes fail the CI
pass pass
def test_resize_token_embeddings(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
return None
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings
model = model_class(config=config)
old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
old_final_logits_bias = model.get_bias()
# reshape the embeddings
model.resize_token_embeddings(size)
new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
new_final_logits_bias = model.get_bias()
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing
models_equal = True
for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_final_logits_bias is not None and new_final_logits_bias is not None:
old_final_logits_bias = old_final_logits_bias["final_logits_bias"]
new_final_logits_bias = new_final_logits_bias["final_logits_bias"]
self.assertEqual(new_final_logits_bias.shape[0], 1)
self.assertEqual(new_final_logits_bias.shape[1], assert_size)
models_equal = True
for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()):
for p1, p2 in zip(old, new):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
@is_pt_tf_cross_test @is_pt_tf_cross_test
@require_tokenizers @require_tokenizers

View File

@@ -41,7 +41,6 @@ if is_tf_available():
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
TFAdaptiveEmbedding,
TFSharedEmbeddings, TFSharedEmbeddings,
tf_top_k_top_p_filtering, tf_top_k_top_p_filtering,
) )
@@ -671,18 +670,20 @@ class TFModelTesterMixin:
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), (tf.keras.layers.Layer, TFAdaptiveEmbedding)) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models: if model_class in list_lm_models:
x = model.get_output_layer_with_bias() x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer) assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name() name = model.get_bias()
assert isinstance(name, str) assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else: else:
x = model.get_output_layer_with_bias() x = model.get_output_embeddings()
assert x is None
name = model.get_prefix_bias_name()
assert x is None assert x is None
name = model.get_bias()
assert name is None
def test_determinism(self): def test_determinism(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -830,23 +831,68 @@ class TFModelTesterMixin:
if not self.test_resize_embeddings: if not self.test_resize_embeddings:
return return
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
INPUT_SHAPE = [1, 10, config.hidden_size]
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "word_embeddings"):
return embedding_layer.word_embeddings
elif hasattr(embedding_layer, "weight"):
return embedding_layer.weight
elif hasattr(embedding_layer, "decoder"):
return embedding_layer.decoder
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "word_embeddings"):
return embedding_layer.word_embeddings
elif hasattr(embedding_layer, "weight"):
return embedding_layer.weight
elif hasattr(embedding_layer, "decoder"):
return embedding_layer.decoder
else:
return None
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]: for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings # build the embeddings
model = model_class(config=config) model = model_class(config=config)
emb_old = model.get_input_embeddings() old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
emb_old.build(INPUT_SHAPE) old_bias = model.get_bias()
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
# reshape the embeddings # reshape the embeddings
new_embeddings = model._get_resized_embeddings(emb_old, size) model.resize_token_embeddings(size)
# # check that the resized embeddings size matches the desired size. new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_bias = model.get_bias()
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_embeddings.shape[0], assert_size) self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing # check that weights remain the same after resizing
emd_old_weights = model._get_word_embeddings(emb_old)
models_equal = True models_equal = True
for p1, p2 in zip(emd_old_weights.numpy(), new_embeddings.numpy()): for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if np.sum(abs(p1 - p2)) > 0: if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_bias is not None and new_bias is not None:
for old_weight, new_weight in zip(old_bias.values(), new_bias.values()):
self.assertEqual(new_weight.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_weight.value(), new_weight.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
self.assertEqual(new_output_embeddings.shape[1], old_output_embeddings.shape[1])
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False models_equal = False
self.assertTrue(models_equal) self.assertTrue(models_equal)

View File

@@ -193,6 +193,33 @@ class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_ctrl_for_sequence_classification(*config_and_inputs) self.model_tester.create_and_check_ctrl_for_sequence_classification(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = [TFCTRLLMHeadModel]
list_other_models_with_output_ebd = [TFCTRLForSequenceClassification]
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
elif model_class in list_other_models_with_output_ebd:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert name is None
else:
x = model.get_output_embeddings()
assert x is None
name = model.get_bias()
assert name is None
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
for model_name in TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

View File

@@ -370,9 +370,16 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert name is None
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_gpt2_sequence_classification_model(self): def test_gpt2_sequence_classification_model(self):

View File

@@ -199,11 +199,83 @@ class TFLEDModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_resize_token_embeddings(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
return None
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings
model = model_class(config=config)
old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
old_final_logits_bias = model.get_bias()
# reshape the embeddings
model.resize_token_embeddings(size)
new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
new_final_logits_bias = model.get_bias()
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing
models_equal = True
for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_final_logits_bias is not None and new_final_logits_bias is not None:
old_final_logits_bias = old_final_logits_bias["final_logits_bias"]
new_final_logits_bias = new_final_logits_bias["final_logits_bias"]
self.assertEqual(new_final_logits_bias.shape[0], 1)
self.assertEqual(new_final_logits_bias.shape[1], assert_size)
models_equal = True
for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()):
for p1, p2 in zip(old, new):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
def test_attention_outputs(self): def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
inputs_dict["global_attention_mask"] = tf.zeros_like(inputs_dict["attention_mask"]) inputs_dict["global_attention_mask"] = tf.zeros_like(inputs_dict["attention_mask"])

View File

@@ -687,15 +687,17 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models: if model_class in list_lm_models:
x = model.get_output_layer_with_bias() x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer) assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name() name = model.get_bias()
assert isinstance(name, str) assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else: else:
x = model.get_output_layer_with_bias() x = model.get_output_embeddings()
assert x is None
name = model.get_prefix_bias_name()
assert x is None assert x is None
name = model.get_bias()
assert name is None
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI # This test is too long (>30sec) and makes fail the CI

View File

@@ -38,7 +38,7 @@ class ModelTester(TFBartModelTester):
@require_tf @require_tf
class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase): class TFMarianMTModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFMarianMTModel,) if is_tf_available() else () all_model_classes = (TFMarianMTModel,) if is_tf_available() else ()
all_generative_model_classes = (TFMarianMTModel,) if is_tf_available() else () all_generative_model_classes = (TFMarianMTModel,) if is_tf_available() else ()
model_tester_cls = ModelTester model_tester_cls = ModelTester
@@ -56,13 +56,6 @@ class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase):
# inputs_embeds not supported # inputs_embeds not supported
pass pass
def test_saved_model_with_hidden_states_output(self):
# Should be uncommented during patrick TF refactor
pass
def test_saved_model_with_attentions_output(self):
pass
def test_compile_tf_model(self): def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -100,15 +93,87 @@ class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI # This test is too long (>30sec) and makes fail the CI
pass pass
def test_resize_token_embeddings(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
return None
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings
model = model_class(config=config)
old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
old_final_logits_bias = model.get_bias()
# reshape the embeddings
model.resize_token_embeddings(size)
new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
new_final_logits_bias = model.get_bias()
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing
models_equal = True
for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_final_logits_bias is not None and new_final_logits_bias is not None:
old_final_logits_bias = old_final_logits_bias["final_logits_bias"]
new_final_logits_bias = new_final_logits_bias["final_logits_bias"]
self.assertEqual(new_final_logits_bias.shape[0], 1)
self.assertEqual(new_final_logits_bias.shape[1], assert_size)
models_equal = True
for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()):
for p1, p2 in zip(old, new):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
class AbstractMarianIntegrationTest(unittest.TestCase): class AbstractMarianIntegrationTest(unittest.TestCase):
maxDiff = 1000 # show more chars for failing integration tests maxDiff = 1000 # show more chars for failing integration tests

View File

@@ -36,7 +36,7 @@ class ModelTester(TFBartModelTester):
@require_tf @require_tf
class TestTFMBartCommon(TFModelTesterMixin, unittest.TestCase): class TFMBartModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFMBartForConditionalGeneration,) if is_tf_available() else () all_model_classes = (TFMBartForConditionalGeneration,) if is_tf_available() else ()
all_generative_model_classes = (TFMBartForConditionalGeneration,) if is_tf_available() else () all_generative_model_classes = (TFMBartForConditionalGeneration,) if is_tf_available() else ()
model_tester_cls = ModelTester model_tester_cls = ModelTester
@@ -54,14 +54,6 @@ class TestTFMBartCommon(TFModelTesterMixin, unittest.TestCase):
# inputs_embeds not supported # inputs_embeds not supported
pass pass
def test_saved_model_with_hidden_states_output(self):
# Should be uncommented during patrick TF refactor
pass
def test_saved_model_with_attentions_output(self):
# Should be uncommented during patrick TF refactor
pass
def test_compile_tf_model(self): def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -99,15 +91,87 @@ class TestTFMBartCommon(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI # This test is too long (>30sec) and makes fail the CI
pass pass
def test_resize_token_embeddings(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
return None
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings
model = model_class(config=config)
old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
old_final_logits_bias = model.get_bias()
# reshape the embeddings
model.resize_token_embeddings(size)
new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
new_final_logits_bias = model.get_bias()
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing
models_equal = True
for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_final_logits_bias is not None and new_final_logits_bias is not None:
old_final_logits_bias = old_final_logits_bias["final_logits_bias"]
new_final_logits_bias = new_final_logits_bias["final_logits_bias"]
self.assertEqual(new_final_logits_bias.shape[0], 1)
self.assertEqual(new_final_logits_bias.shape[1], assert_size)
models_equal = True
for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()):
for p1, p2 in zip(old, new):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
@is_pt_tf_cross_test @is_pt_tf_cross_test
@require_sentencepiece @require_sentencepiece

View File

@@ -292,15 +292,17 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models: if model_class in list_lm_models:
x = model.get_output_layer_with_bias() x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer) assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name() name = model.get_bias()
assert isinstance(name, str) assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else: else:
x = model.get_output_layer_with_bias() x = model.get_output_embeddings()
assert x is None
name = model.get_prefix_bias_name()
assert x is None assert x is None
name = model.get_bias()
assert name is None
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI # This test is too long (>30sec) and makes fail the CI

View File

@@ -228,9 +228,16 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert name is None
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_openai_gpt_sequence_classification_model(self): def test_openai_gpt_sequence_classification_model(self):

View File

@@ -41,7 +41,7 @@ class ModelTester(TFBartModelTester):
@require_tf @require_tf
class TestTFPegasusCommon(TFModelTesterMixin, unittest.TestCase): class TFPegasusModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFPegasusForConditionalGeneration,) if is_tf_available() else () all_model_classes = (TFPegasusForConditionalGeneration,) if is_tf_available() else ()
all_generative_model_classes = (TFPegasusForConditionalGeneration,) if is_tf_available() else () all_generative_model_classes = (TFPegasusForConditionalGeneration,) if is_tf_available() else ()
model_tester_cls = ModelTester model_tester_cls = ModelTester
@@ -59,14 +59,6 @@ class TestTFPegasusCommon(TFModelTesterMixin, unittest.TestCase):
# inputs_embeds not supported # inputs_embeds not supported
pass pass
def test_saved_model_with_hidden_states_output(self):
# Should be uncommented during patrick TF refactor
pass
def test_saved_model_with_attentions_output(self):
# Should be uncommented during patrick TF refactor
pass
def test_compile_tf_model(self): def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -104,15 +96,87 @@ class TestTFPegasusCommon(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_saved_model_creation(self): def test_saved_model_creation(self):
# This test is too long (>30sec) and makes fail the CI # This test is too long (>30sec) and makes fail the CI
pass pass
def test_resize_token_embeddings(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
return None
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings
model = model_class(config=config)
old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
old_final_logits_bias = model.get_bias()
# reshape the embeddings
model.resize_token_embeddings(size)
new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
new_final_logits_bias = model.get_bias()
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing
models_equal = True
for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_final_logits_bias is not None and new_final_logits_bias is not None:
old_final_logits_bias = old_final_logits_bias["final_logits_bias"]
new_final_logits_bias = new_final_logits_bias["final_logits_bias"]
self.assertEqual(new_final_logits_bias.shape[0], 1)
self.assertEqual(new_final_logits_bias.shape[1], assert_size)
models_equal = True
for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()):
for p1, p2 in zip(old, new):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
@is_pt_tf_cross_test @is_pt_tf_cross_test
@require_sentencepiece @require_sentencepiece

View File

@@ -289,9 +289,16 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
if model_class in self.all_generative_model_classes:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert name is None
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
def test_saved_model_creation(self): def test_saved_model_creation(self):

View File

@@ -187,13 +187,20 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
def test_model_common_attributes(self): def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_other_models_with_output_ebd = [TFTransfoXLForSequenceClassification]
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer) assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias() if model_class in list_other_models_with_output_ebd:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_bias()
assert name is None
else:
x = model.get_output_embeddings()
assert x is None assert x is None
name = model.get_prefix_bias_name() name = model.get_bias()
assert name is None assert name is None
@slow @slow