BlipModel: get_multimodal_features method (#30438)

* add_blip_get_multimodal_feautres

* Fix docstring error

* reimplement get_multimodal_features

* fix error

* recheck code quality

* add new necessary tests
This commit is contained in:
Jiarui Xu
2024-05-01 04:01:01 +10:00
committed by GitHub
parent 9112520b15
commit 0cdb6b3f92
2 changed files with 110 additions and 0 deletions

View File

@@ -582,6 +582,63 @@ class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
model = BlipModel.from_pretrained(model_name)
self.assertIsNotNone(model)
def test_get_image_features(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
keys_to_pop = ["input_ids", "attention_mask", "return_loss"]
for key in keys_to_pop:
inputs_dict.pop(key)
model = BlipModel(config).to(torch_device)
model.eval()
image_features = model.get_image_features(**inputs_dict)
self.assertEqual(
image_features.shape,
(
self.model_tester.batch_size,
model.projection_dim,
),
)
def test_get_text_features(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
keys_to_pop = ["pixel_values", "return_loss"]
for key in keys_to_pop:
inputs_dict.pop(key)
model = BlipModel(config).to(torch_device)
model.eval()
text_features = model.get_text_features(**inputs_dict)
self.assertEqual(
text_features.shape,
(
self.model_tester.batch_size,
model.projection_dim,
),
)
def test_get_multimodal_features(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
keys_to_pop = ["return_loss"]
for key in keys_to_pop:
inputs_dict.pop(key)
model = BlipModel(config).to(torch_device)
model.eval()
multimodal_features = model.get_multimodal_features(**inputs_dict)
self.assertEqual(
multimodal_features.shape,
(
self.model_tester.batch_size,
model.projection_dim,
),
)
def test_pt_tf_model_equivalence(self):
super().test_pt_tf_model_equivalence(allow_missing_keys=True)