Fix TF s2s models (#9478)
* Fix Seq2Seq models for serving * Apply style * Fix lonfgormer * Fix mBart/Pegasus/Blenderbot * Apply style * Add a main intermediate layer * Apply style * Remove import * Apply tf.function to Longformer * Fix utils check_copy * Update S2S template * Fix BART + Blenderbot * Fix BlenderbotSmall * Fix BlenderbotSmall * Fix BlenderbotSmall * Fix MBart * Fix Marian * Fix Pegasus + template * Apply style * Fix common attributes test * Forgot to fix the LED test * Apply Patrick's comment on LED Decoder
This commit is contained in:
@@ -264,22 +264,8 @@ class TFBartModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
models_equal = False
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation_extended(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
# This test is too long (>30sec) and makes fail the CI
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@@ -200,22 +200,8 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
name = model.get_bias()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation_extended(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
# This test is too long (>30sec) and makes fail the CI
|
||||
pass
|
||||
|
||||
def test_resize_token_embeddings(self):
|
||||
|
||||
@@ -188,28 +188,19 @@ class TFBlenderbotSmallModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
|
||||
x = model.get_output_layer_with_bias()
|
||||
assert x is None
|
||||
name = model.get_prefix_bias_name()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation_extended(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
if model_class in self.all_generative_model_classes:
|
||||
x = model.get_output_embeddings()
|
||||
assert isinstance(x, tf.keras.layers.Layer)
|
||||
name = model.get_bias()
|
||||
assert isinstance(name, dict)
|
||||
for k, v in name.items():
|
||||
assert isinstance(v, tf.Variable)
|
||||
else:
|
||||
x = model.get_output_embeddings()
|
||||
assert x is None
|
||||
name = model.get_bias()
|
||||
assert name is None
|
||||
|
||||
def test_resize_token_embeddings(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
@@ -274,6 +265,10 @@ class TFBlenderbotSmallModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
models_equal = False
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
def test_saved_model_creation(self):
|
||||
# This test is too long (>30sec) and makes fail the CI
|
||||
pass
|
||||
|
||||
|
||||
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
|
||||
|
||||
@@ -211,36 +211,35 @@ class TFModelTesterMixin:
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.output_hidden_states = True
|
||||
config.output_attentions = False
|
||||
|
||||
if hasattr(config, "use_cache"):
|
||||
config.use_cache = False
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
|
||||
# A saved model is always executed in graph mode, since we merged the PR #8777
|
||||
# the booleans in graph mode are always the ones in the config, then we update
|
||||
# the use_cache property if it exists in order to have similar booleans with the inputs
|
||||
if "use_cache" in class_inputs_dict:
|
||||
config.use_cache = class_inputs_dict.pop("use_cache")
|
||||
model = model_class(config)
|
||||
num_out = len(model(class_inputs_dict))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model.save_pretrained(tmpdirname)
|
||||
saved_model_dir = os.path.join(tmpdirname, "saved_model")
|
||||
model = tf.keras.models.load_model(saved_model_dir)
|
||||
model.save_pretrained(tmpdirname, saved_model=True)
|
||||
model = tf.keras.models.load_model(os.path.join(tmpdirname, "saved_model", "1"))
|
||||
outputs = model(class_inputs_dict)
|
||||
|
||||
if self.is_encoder_decoder:
|
||||
output = outputs["encoder_hidden_states"] if isinstance(outputs, dict) else outputs[-1]
|
||||
output = outputs["encoder_hidden_states"]
|
||||
else:
|
||||
output = outputs["hidden_states"] if isinstance(outputs, dict) else outputs[-1]
|
||||
output = outputs["hidden_states"]
|
||||
|
||||
hidden_states = [t.numpy() for t in output]
|
||||
self.assertEqual(len(outputs), num_out)
|
||||
|
||||
expected_num_layers = getattr(
|
||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
||||
)
|
||||
self.assertEqual(len(hidden_states), expected_num_layers)
|
||||
|
||||
self.assertEqual(len(output), expected_num_layers)
|
||||
self.assertListEqual(
|
||||
list(hidden_states[0].shape[-2:]),
|
||||
list(output[0].shape[-2:]),
|
||||
[self.model_tester.seq_length, self.model_tester.hidden_size],
|
||||
)
|
||||
|
||||
@@ -248,36 +247,33 @@ class TFModelTesterMixin:
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.output_attentions = True
|
||||
config.output_hidden_states = False
|
||||
|
||||
if hasattr(config, "use_cache"):
|
||||
config.use_cache = False
|
||||
|
||||
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length)
|
||||
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
|
||||
# A saved model is always executed in graph mode, since we merged the PR #8777
|
||||
# the booleans in graph mode are always the ones in the config, then we update
|
||||
# the use_cache property if it exists in order to have similar booleans with the inputs
|
||||
if "use_cache" in class_inputs_dict:
|
||||
config.use_cache = class_inputs_dict.pop("use_cache")
|
||||
model = model_class(config)
|
||||
num_out = len(model(class_inputs_dict))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
saved_model_dir = os.path.join(tmpdirname, "saved_model")
|
||||
model.save_pretrained(saved_model_dir)
|
||||
model = tf.keras.models.load_model(saved_model_dir)
|
||||
model.save_pretrained(tmpdirname, saved_model=True)
|
||||
model = tf.keras.models.load_model(os.path.join(tmpdirname, "saved_model", "1"))
|
||||
outputs = model(class_inputs_dict)
|
||||
|
||||
if self.is_encoder_decoder:
|
||||
output = outputs["encoder_attentions"] if isinstance(outputs, dict) else outputs[-1]
|
||||
output = outputs["encoder_attentions"]
|
||||
else:
|
||||
output = outputs["attentions"] if isinstance(outputs, dict) else outputs[-1]
|
||||
output = outputs["attentions"]
|
||||
|
||||
attentions = [t.numpy() for t in output]
|
||||
self.assertEqual(len(outputs), num_out)
|
||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
||||
self.assertEqual(len(output), num_out)
|
||||
self.assertEqual(len(output), self.model_tester.num_hidden_layers)
|
||||
self.assertListEqual(
|
||||
list(attentions[0].shape[-3:]),
|
||||
list(output[0].shape[-3:]),
|
||||
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
|
||||
)
|
||||
|
||||
|
||||
@@ -352,30 +352,20 @@ class TFLEDModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(model.config.output_hidden_states, True)
|
||||
check_encoder_attentions_output(outputs)
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
# longformer has special attentions which are not
|
||||
# compatible in graph mode
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
# TODO(JPLU, PVP) this test should pass!!! PVP:
|
||||
# IMO there is a problem with the signature check.
|
||||
# Test passes for TFLEDModel, but not for TFLEDForConditionalGeneration
|
||||
# IMO the reason is that the tensor variable name cannot be changed
|
||||
# from decoder_input_ids -> input_ids, which poses a BIG restrictions
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_saved_model_creation_extended(self):
|
||||
# All the tests about building a saved model
|
||||
# fails because the Seq2Seq models uses model in a model
|
||||
# as a layer.
|
||||
# TODO(JPLU) WARNING: NEED TO BE FIXED ASAP
|
||||
pass
|
||||
|
||||
def test_saved_model_creation(self):
|
||||
# This test is too long (>30sec) and makes fail the CI
|
||||
pass
|
||||
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
# This test don't pass because of the error:
|
||||
# condition [13,8,4,5], then [13,8,4,5], and else [13,8,4,6] must be broadcastable
|
||||
# This occurs line 323 in modeling_tf_led.py because the condition line 255
|
||||
# returns a tensor of shape
|
||||
# [batch_size, seq_len, self.num_heads, self.one_sided_attn_window_size * 2 + 2]
|
||||
# if is_global_attn is True and a tensor of shape
|
||||
# [batch_size, seq_len, self.num_heads, self.one_sided_attn_window_size * 2 + 1]
|
||||
# This is due to the tf.concat call line 703 that adds one dimension
|
||||
# Need to check with PVP how to properly fix this
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@@ -233,22 +233,8 @@ class TFMarianModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
name = model.get_bias()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation_extended(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
# This test is too long (>30sec) and makes fail the CI
|
||||
pass
|
||||
|
||||
def test_resize_token_embeddings(self):
|
||||
|
||||
@@ -204,22 +204,8 @@ class TFMBartModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
name = model.get_bias()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation_extended(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
# This test is too long (>30sec) and makes fail the CI
|
||||
pass
|
||||
|
||||
def test_resize_token_embeddings(self):
|
||||
|
||||
@@ -231,22 +231,8 @@ class TFPegasusModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
name = model.get_bias()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_hidden_states_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_saved_model_with_attentions_output(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
pass
|
||||
|
||||
def test_saved_model_creation_extended(self):
|
||||
# TODO(JPLU, PVP) - fix this with s2s tf-serving PR
|
||||
# This test is too long (>30sec) and makes fail the CI
|
||||
pass
|
||||
|
||||
def test_resize_token_embeddings(self):
|
||||
|
||||
Reference in New Issue
Block a user