Fix embeddings resizing in TF models (#8657)
* Resize the biases in same time than the embeddings * Trigger CI * Biases are not reset anymore * Remove get_output_embeddings + better LM model detection in generation utils * Apply style * First test on BERT * Update docstring + new name * Apply the new resizing logic to all the models * fix tests * Apply style * Update the template * Fix naming * Fix naming * Apply style * Apply style * Remove unused import * Revert get_output_embeddings * Trigger CI * Update num parameters * Restore get_output_embeddings in TFPretrainedModel and add comments * Style * Add decoder resizing * Style * Fix tests * Separate bias and decoder resize * Fix tests * Fix tests * Apply style * Add bias resizing in MPNet * Trigger CI * Apply style
This commit is contained in:
@@ -272,6 +272,17 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_albert_for_question_answering(*config_and_inputs)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
|
||||
@@ -126,6 +126,17 @@ class TFBartModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
# Should be uncommented during patrick TF refactor
|
||||
pass
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
|
||||
@require_tf
|
||||
class TFBartHeadTests(unittest.TestCase):
|
||||
|
||||
@@ -331,6 +331,25 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
model = TFBertModel.from_pretrained("jplu/tiny-tf-bert-random")
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
list_lm_models = [TFBertForMaskedLM, TFBertForPreTraining, TFBertLMHeadModel]
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
|
||||
if model_class in list_lm_models:
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert isinstance(x, tf.keras.layers.Layer)
|
||||
name = model.get_prefix_bias_name()
|
||||
assert isinstance(name, str)
|
||||
else:
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert x is None
|
||||
|
||||
def test_custom_load_tf_weights(self):
|
||||
model, output_loading_info = TFBertForTokenClassification.from_pretrained(
|
||||
"jplu/tiny-tf-bert-random", output_loading_info=True
|
||||
|
||||
@@ -18,17 +18,17 @@ import unittest
|
||||
from tests.test_configuration_common import ConfigTester
|
||||
from tests.test_modeling_tf_bart import TFBartModelTester
|
||||
from tests.test_modeling_tf_common import TFModelTesterMixin
|
||||
from transformers import (
|
||||
BlenderbotConfig,
|
||||
BlenderbotSmallTokenizer,
|
||||
TFAutoModelForSeq2SeqLM,
|
||||
TFBlenderbotForConditionalGeneration,
|
||||
is_tf_available,
|
||||
)
|
||||
from transformers import BlenderbotConfig, BlenderbotSmallTokenizer, is_tf_available
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.testing_utils import is_pt_tf_cross_test, require_tf, require_tokenizers, slow
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers import TFAutoModelForSeq2SeqLM, TFBlenderbotForConditionalGeneration
|
||||
|
||||
|
||||
class TFBlenderbotModelTester(TFBartModelTester):
|
||||
config_updates = dict(
|
||||
normalize_before=True,
|
||||
@@ -65,6 +65,17 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
# Should be uncommented during patrick TF refactor
|
||||
pass
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
@require_tokenizers
|
||||
|
||||
@@ -592,12 +592,26 @@ class TFModelTesterMixin:
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
list_lm_models = (
|
||||
list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.values())
|
||||
+ list(TF_MODEL_FOR_MASKED_LM_MAPPING.values())
|
||||
+ list(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values())
|
||||
)
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), (tf.keras.layers.Layer, TFAdaptiveEmbedding))
|
||||
x = model.get_output_embeddings()
|
||||
assert x is None or isinstance(x, tf.keras.layers.Layer)
|
||||
|
||||
if model_class in list_lm_models:
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert isinstance(x, tf.keras.layers.Layer)
|
||||
name = model.get_prefix_bias_name()
|
||||
assert isinstance(name, str)
|
||||
else:
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert x is None
|
||||
|
||||
def test_determinism(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
@@ -353,6 +353,17 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
def test_gpt2_sequence_classification_model(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_gpt2_for_sequence_classification(*config_and_inputs)
|
||||
|
||||
@@ -678,6 +678,25 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
extended_model = tf.keras.Model(inputs=[input_ids, visual_feats, visual_pos], outputs=[outputs])
|
||||
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
list_lm_models = [TFLxmertForPreTraining]
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
|
||||
if model_class in list_lm_models:
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert isinstance(x, tf.keras.layers.Layer)
|
||||
name = model.get_prefix_bias_name()
|
||||
assert isinstance(name, str)
|
||||
else:
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert x is None
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
@@ -94,6 +94,17 @@ class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase):
|
||||
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
|
||||
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
|
||||
class AbstractMarianIntegrationTest(unittest.TestCase):
|
||||
maxDiff = 1000 # show more chars for failing integration tests
|
||||
|
||||
@@ -93,6 +93,17 @@ class TestTFMBartCommon(TFModelTesterMixin, unittest.TestCase):
|
||||
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
|
||||
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
@require_sentencepiece
|
||||
|
||||
@@ -283,6 +283,25 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_mobilebert_for_token_classification(*config_and_inputs)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
list_lm_models = [TFMobileBertForMaskedLM, TFMobileBertForPreTraining]
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
|
||||
if model_class in list_lm_models:
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert isinstance(x, tf.keras.layers.Layer)
|
||||
name = model.get_prefix_bias_name()
|
||||
assert isinstance(name, str)
|
||||
else:
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert x is None
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
# for model_name in TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
|
||||
@@ -202,6 +202,17 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_openai_gpt_double_head(*config_and_inputs)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
|
||||
@@ -99,6 +99,17 @@ class TestTFPegasusCommon(TFModelTesterMixin, unittest.TestCase):
|
||||
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
|
||||
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
@require_sentencepiece
|
||||
|
||||
@@ -282,6 +282,17 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_t5_decoder_model_past_large_inputs(*config_and_inputs)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
model = TFT5Model.from_pretrained("t5-small")
|
||||
|
||||
@@ -163,6 +163,17 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_transfo_xl_lm_head(*config_and_inputs)
|
||||
|
||||
def test_model_common_attributes(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
|
||||
Reference in New Issue
Block a user