From 279bc5849b6e497810cfb39bc1e991d2ff7e6679 Mon Sep 17 00:00:00 2001 From: Sean Moriarity Date: Wed, 4 May 2022 10:37:58 -0400 Subject: [PATCH] Allow saved_model export of TFCLIPModel in save_pretrained (#16886) * CLIP Serving * Add type hints per code review * Use black, flake8, and isort * Update src/transformers/models/clip/modeling_tf_clip.py Co-authored-by: Joao Gante * Rollback serving_output and add TODO * Remove irrelevant portions of failing tests * Revert "Rollback serving_output and add TODO" This reverts commit a4abfa6ba3b7875a13538dbc2ddc4eb17dfcca8d. * Rollback to original test/serving_output * Fix unused var * Apply suggestions from code review * Update formatting with black * Fix style again from rebase * Update tests/models/clip/test_modeling_tf_clip.py Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Co-authored-by: Joao Gante Co-authored-by: Sean Moriarity Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> --- .../models/clip/modeling_tf_clip.py | 28 ++++- tests/models/clip/test_modeling_tf_clip.py | 109 ++++++++++++++++++ 2 files changed, 132 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index 5d20962001..ad26a7bfc3 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -551,11 +551,14 @@ class TFCLIPTextTransformer(tf.keras.layers.Layer): ) def _build_causal_attention_mask(self, batch_size, seq_length, dtype=tf.float32): - - diag = tf.constant(0.0, shape=(seq_length,), dtype=dtype) + # It is possible with an unspecified sequence length for seq_length to be + # a runtime value, which is unsupported by tf.constant. Per the TensorFlow + # docs, tf.fill can handle runtime dynamic shapes: + # https://www.tensorflow.org/api_docs/python/tf/fill + diag = tf.cast(tf.fill((seq_length,), 0.0), dtype) # set an additive 2D attention mask with all places being masked - to_mask = tf.constant(-10000.0, shape=(seq_length, seq_length), dtype=dtype) + to_mask = tf.cast(tf.fill((seq_length, seq_length), -10000.0), dtype) # set diagonal & lower triangular parts to 0 (i.e. the places not to be masked) # TIP: think the 2D matrix as the space of (query_seq, key_seq) @@ -1082,6 +1085,18 @@ class TFCLIPTextModel(TFCLIPPreTrainedModel): return outputs + @tf.function( + input_signature=[ + { + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + } + ] + ) + def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: + output = self.call(inputs) + return self.serving_output(output) + def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None @@ -1123,7 +1138,7 @@ class TFCLIPVisionModel(TFCLIPPreTrainedModel): } ] ) - def serving(self, inputs): + def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: """ Method used for serving the model. @@ -1226,7 +1241,7 @@ class TFCLIPModel(TFCLIPPreTrainedModel): } ] ) - def serving(self, inputs): + def serving(self, inputs: Dict[str, tf.Tensor]) -> TFCLIPOutput: """ Method used for serving the model. @@ -1375,4 +1390,7 @@ class TFCLIPModel(TFCLIPPreTrainedModel): return outputs def serving_output(self, output: TFCLIPOutput) -> TFCLIPOutput: + # TODO: As is this currently fails with saved_model=True, because + # TensorFlow cannot trace through nested dataclasses. Reference: + # https://github.com/huggingface/transformers/pull/16886 return output diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py index ea572e6a2a..797d5b73b3 100644 --- a/tests/models/clip/test_modeling_tf_clip.py +++ b/tests/models/clip/test_modeling_tf_clip.py @@ -256,6 +256,62 @@ class TFCLIPVisionModelTest(TFModelTesterMixin, unittest.TestCase): model = TFCLIPVisionModel.from_pretrained(model_name) self.assertIsNotNone(model) + @slow + def test_saved_model_creation_extended(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.output_hidden_states = True + config.output_attentions = True + + if hasattr(config, "use_cache"): + config.use_cache = True + + # in CLIP, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) + image_size = (self.model_tester.image_size, self.model_tester.image_size) + patch_size = (self.model_tester.patch_size, self.model_tester.patch_size) + num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) + seq_len = num_patches + 1 + + for model_class in self.all_model_classes: + class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) + model = model_class(config) + num_out = len(model(class_inputs_dict)) + + with tempfile.TemporaryDirectory() as tmpdirname: + model.save_pretrained(tmpdirname, saved_model=True) + saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") + model = tf.keras.models.load_model(saved_model_dir) + outputs = model(class_inputs_dict) + output_hidden_states = outputs["hidden_states"] + output_attentions = outputs["attentions"] + + # Check num outputs + self.assertEqual(len(outputs), num_out) + + # Check num layers + expected_num_layers = getattr( + self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 + ) + + self.assertEqual(len(output_hidden_states), expected_num_layers) + self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers) + + # Check attention outputs + image_size = (self.model_tester.image_size, self.model_tester.image_size) + patch_size = (self.model_tester.patch_size, self.model_tester.patch_size) + num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) + seq_len = num_patches + 1 + + self.assertListEqual( + list(output_attentions[0].shape[-3:]), + [self.model_tester.num_attention_heads, seq_len, seq_len], + ) + + # Check hidden states + self.assertListEqual( + list(output_hidden_states[0].shape[-2:]), + [seq_len, self.model_tester.hidden_size], + ) + class TFCLIPTextModelTester: def __init__( @@ -367,6 +423,54 @@ class TFCLIPTextModelTest(TFModelTesterMixin, unittest.TestCase): model = TFCLIPTextModel.from_pretrained(model_name) self.assertIsNotNone(model) + @slow + def test_saved_model_creation_extended(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.output_hidden_states = True + config.output_attentions = True + + if hasattr(config, "use_cache"): + config.use_cache = True + + for model_class in self.all_model_classes: + class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) + model = model_class(config) + num_out = len(model(class_inputs_dict)) + + with tempfile.TemporaryDirectory() as tmpdirname: + model.save_pretrained(tmpdirname, saved_model=True) + saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") + model = tf.keras.models.load_model(saved_model_dir) + outputs = model(class_inputs_dict) + output_hidden_states = outputs["hidden_states"] + output_attentions = outputs["attentions"] + + # Check number of outputs + self.assertEqual(len(outputs), num_out) + + # Check number of layers + expected_num_layers = getattr( + self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 + ) + + # Check hidden states + self.assertEqual(len(output_hidden_states), expected_num_layers) + self.assertListEqual( + list(output_hidden_states[0].shape[-2:]), + [self.model_tester.seq_length, self.model_tester.hidden_size], + ) + + # Check attention outputs + self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers) + + seq_length = self.model_tester.seq_length + key_length = getattr(self.model_tester, "key_length", seq_length) + + self.assertListEqual( + list(output_attentions[0].shape[-3:]), + [self.model_tester.num_attention_heads, seq_length, key_length], + ) + class TFCLIPModelTester: def __init__(self, parent, is_training=True): @@ -502,6 +606,11 @@ class TFCLIPModelTest(TFModelTesterMixin, unittest.TestCase): model = TFCLIPModel.from_pretrained(model_name) self.assertIsNotNone(model) + @unittest.skip(reason="Currently `saved_model` doesn't work with nested outputs.") + @slow + def test_saved_model_creation_extended(self): + pass + # We will verify our results on an image of cute cats def prepare_img():