Fix the issue of blip model returning loss even when the label is not provided. (#21811)
* Fix the issue of blip model returning loss even when the label is not provoided * Fix ruff failure * Incorporate PR feedbacks * Incorporate PR feedbacks * Incorporate PR feedbacks * Incorporate PR feedbacks
This commit is contained in:
@@ -990,7 +990,7 @@ class BlipForConditionalGeneration(BlipPreTrainedModel):
|
|||||||
|
|
||||||
>>> outputs = model(**inputs)
|
>>> outputs = model(**inputs)
|
||||||
```"""
|
```"""
|
||||||
batch_size = pixel_values.shape[0]
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
vision_outputs = self.vision_model(
|
vision_outputs = self.vision_model(
|
||||||
@@ -1002,12 +1002,6 @@ class BlipForConditionalGeneration(BlipPreTrainedModel):
|
|||||||
|
|
||||||
image_embeds = vision_outputs[0]
|
image_embeds = vision_outputs[0]
|
||||||
|
|
||||||
if input_ids is None:
|
|
||||||
input_ids = torch.LongTensor([[self.decoder_input_ids] * batch_size]).to(image_embeds.device)
|
|
||||||
|
|
||||||
if labels is None:
|
|
||||||
labels = input_ids.masked_fill(input_ids == self.decoder_pad_token_id, -100)
|
|
||||||
|
|
||||||
outputs = self.text_decoder(
|
outputs = self.text_decoder(
|
||||||
input_ids=input_ids,
|
input_ids=input_ids,
|
||||||
attention_mask=attention_mask,
|
attention_mask=attention_mask,
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ import numpy as np
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from transformers import BlipConfig, BlipTextConfig, BlipVisionConfig
|
from transformers import BlipConfig, BlipTextConfig, BlipVisionConfig
|
||||||
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
|
from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
|
||||||
from transformers.utils import is_torch_available, is_vision_available
|
from transformers.utils import is_torch_available, is_vision_available
|
||||||
|
|
||||||
from ...test_configuration_common import ConfigTester
|
from ...test_configuration_common import ConfigTester
|
||||||
@@ -1111,6 +1111,7 @@ class BlipModelIntegrationTest(unittest.TestCase):
|
|||||||
[30522, 1037, 3861, 1997, 1037, 2450, 3564, 2006, 1996, 3509, 2007, 2014, 3899, 102],
|
[30522, 1037, 3861, 1997, 1037, 2450, 3564, 2006, 1996, 3509, 2007, 2014, 3899, 102],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@require_torch_gpu
|
||||||
def test_inference_image_captioning_fp16(self):
|
def test_inference_image_captioning_fp16(self):
|
||||||
model = BlipForConditionalGeneration.from_pretrained(
|
model = BlipForConditionalGeneration.from_pretrained(
|
||||||
"Salesforce/blip-image-captioning-base", torch_dtype=torch.float16
|
"Salesforce/blip-image-captioning-base", torch_dtype=torch.float16
|
||||||
|
|||||||
Reference in New Issue
Block a user