BlipModel: get_multimodal_features method (#30438)

* add_blip_get_multimodal_feautres * Fix docstring error * reimplement get_multimodal_features * fix error * recheck code quality * add new necessary tests
2024-05-01 04:01:01 +10:00
parent 9112520b15
commit 0cdb6b3f92
2 changed files with 110 additions and 0 deletions
--- a/tests/models/blip/test_modeling_blip.py
+++ b/tests/models/blip/test_modeling_blip.py
@@ -582,6 +582,63 @@ class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        model = BlipModel.from_pretrained(model_name)
        self.assertIsNotNone(model)

+    def test_get_image_features(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        keys_to_pop = ["input_ids", "attention_mask", "return_loss"]
+
+        for key in keys_to_pop:
+            inputs_dict.pop(key)
+
+        model = BlipModel(config).to(torch_device)
+        model.eval()
+        image_features = model.get_image_features(**inputs_dict)
+        self.assertEqual(
+            image_features.shape,
+            (
+                self.model_tester.batch_size,
+                model.projection_dim,
+            ),
+        )
+
+    def test_get_text_features(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        keys_to_pop = ["pixel_values", "return_loss"]
+
+        for key in keys_to_pop:
+            inputs_dict.pop(key)
+
+        model = BlipModel(config).to(torch_device)
+        model.eval()
+        text_features = model.get_text_features(**inputs_dict)
+        self.assertEqual(
+            text_features.shape,
+            (
+                self.model_tester.batch_size,
+                model.projection_dim,
+            ),
+        )
+
+    def test_get_multimodal_features(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        keys_to_pop = ["return_loss"]
+
+        for key in keys_to_pop:
+            inputs_dict.pop(key)
+
+        model = BlipModel(config).to(torch_device)
+        model.eval()
+        multimodal_features = model.get_multimodal_features(**inputs_dict)
+        self.assertEqual(
+            multimodal_features.shape,
+            (
+                self.model_tester.batch_size,
+                model.projection_dim,
+            ),
+        )
+
    def test_pt_tf_model_equivalence(self):
        super().test_pt_tf_model_equivalence(allow_missing_keys=True)