Pix2Struct: fix wrong broadcast axis of attention mask in visual encoder (#23976)

* fix wrong broadcast axis of attention mask in visual encoder * fix slow tests --------- Co-authored-by: younesbelkada <younesbelkada@gmail.com>
2023-06-06 00:47:29 +09:00
parent 7824fa431e
commit 44bd590a29
2 changed files with 5 additions and 5 deletions
--- a/tests/models/pix2struct/test_modeling_pix2struct.py
+++ b/tests/models/pix2struct/test_modeling_pix2struct.py
@@ -757,12 +757,12 @@ class Pix2StructIntegrationTest(unittest.TestCase):

        self.assertEqual(
            processor.decode(predictions[0], skip_special_tokens=True),
-            "A picture of a stop sign with a red stop sign on it.",
+            "A picture of a stop sign with a red stop sign",
        )

        self.assertEqual(
            processor.decode(predictions[1], skip_special_tokens=True),
-            "An photography of the Temple Bar and the Temple Bar.",
+            "An photography of the Temple Bar and other places in the city.",
        )

    def test_vqa_model(self):