Update test_batched_inference_image_captioning_conditioned (#23391)
* fix * fix * fix test + add more docs --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com> Co-authored-by: younesbelkada <younesbelkada@gmail.com>
This commit is contained in:
@@ -749,17 +749,20 @@ class Pix2StructIntegrationTest(unittest.TestCase):
|
||||
texts = ["A picture of", "An photography of"]
|
||||
|
||||
# image only
|
||||
inputs = processor(images=[image_1, image_2], text=texts, return_tensors="pt").to(torch_device)
|
||||
inputs = processor(images=[image_1, image_2], text=texts, return_tensors="pt", add_special_tokens=False).to(
|
||||
torch_device
|
||||
)
|
||||
|
||||
predictions = model.generate(**inputs)
|
||||
|
||||
self.assertEqual(
|
||||
processor.decode(predictions[0], skip_special_tokens=True), "A picture of a stop sign that says yes."
|
||||
processor.decode(predictions[0], skip_special_tokens=True),
|
||||
"A picture of a stop sign with a red stop sign on it.",
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
processor.decode(predictions[1], skip_special_tokens=True),
|
||||
"An photography of the Temple Bar and a few other places.",
|
||||
"An photography of the Temple Bar and the Temple Bar.",
|
||||
)
|
||||
|
||||
def test_vqa_model(self):
|
||||
|
||||
Reference in New Issue
Block a user