Update test_batched_inference_image_captioning_conditioned (#23391)

* fix * fix * fix test + add more docs --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com> Co-authored-by: younesbelkada <younesbelkada@gmail.com>
2023-05-16 14:49:24 +02:00
parent d765717c76
commit 21741e8c7e
2 changed files with 8 additions and 3 deletions
--- a/tests/models/pix2struct/test_modeling_pix2struct.py
+++ b/tests/models/pix2struct/test_modeling_pix2struct.py
@@ -749,17 +749,20 @@ class Pix2StructIntegrationTest(unittest.TestCase):
        texts = ["A picture of", "An photography of"]

        # image only
-        inputs = processor(images=[image_1, image_2], text=texts, return_tensors="pt").to(torch_device)
+        inputs = processor(images=[image_1, image_2], text=texts, return_tensors="pt", add_special_tokens=False).to(
+            torch_device
+        )

        predictions = model.generate(**inputs)

        self.assertEqual(
-            processor.decode(predictions[0], skip_special_tokens=True), "A picture of a stop sign that says yes."
+            processor.decode(predictions[0], skip_special_tokens=True),
+            "A picture of a stop sign with a red stop sign on it.",
        )

        self.assertEqual(
            processor.decode(predictions[1], skip_special_tokens=True),
-            "An photography of the Temple Bar and a few other places.",
+            "An photography of the Temple Bar and the Temple Bar.",
        )

    def test_vqa_model(self):