From abd253103478b21faafb7c9a6e7a1a7d1effe757 Mon Sep 17 00:00:00 2001
From: Shauray Singh <39147312+shauray8@users.noreply.github.com>
Date: Wed, 27 Sep 2023 14:26:07 +0530
Subject: [PATCH] Fix padding for IDEFICS  (#26396)

* fix

* fixup

* tests

* fixup
---
 .../models/idefics/processing_idefics.py      | 21 ++++++++++---------
 .../models/idefics/test_processor_idefics.py  | 19 +++++++++++++++++
 2 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/src/transformers/models/idefics/processing_idefics.py b/src/transformers/models/idefics/processing_idefics.py
index c1d8485c53..e6e0a9254a 100644
--- a/src/transformers/models/idefics/processing_idefics.py
+++ b/src/transformers/models/idefics/processing_idefics.py
@@ -280,7 +280,7 @@ class IdeficsProcessor(ProcessorMixin):
             else:
                 return fake_token + image_token + fake_token
 
-        all_texts = []
+        all_prompts = []
         all_images = []
         for sample in prompts:
             # the model was trained on samples starting with <s>
@@ -321,17 +321,18 @@ class IdeficsProcessor(ProcessorMixin):
 
             image_objects = self.image_processor(image_objects, transform=transform)
 
-            text_encoding = self.tokenizer(
-                text=full_text,
-                add_special_tokens=False,
-                padding=padding,
-                truncation=truncation,
-                max_length=max_length,
-            )
-
-            all_texts.append(text_encoding["input_ids"])
+            all_prompts.append(full_text)
             all_images.append(image_objects)
 
+        text_encoding = self.tokenizer(
+            text=all_prompts,
+            add_special_tokens=False,
+            padding=padding,
+            truncation=truncation,
+            max_length=max_length,
+        )
+        all_texts = text_encoding["input_ids"]
+
         max_seq_len = max(len(x) for x in all_texts)
 
         # max_num_images has to be at least 1 even when there are no images
diff --git a/tests/models/idefics/test_processor_idefics.py b/tests/models/idefics/test_processor_idefics.py
index 4ad11d31ae..523b7a5515 100644
--- a/tests/models/idefics/test_processor_idefics.py
+++ b/tests/models/idefics/test_processor_idefics.py
@@ -141,6 +141,25 @@ class IdeficsProcessorTest(TestCasePlus):
 
         self.assertListEqual(decoded_tok, decoded_processor)
 
+    def test_tokenizer_padding(self):
+        image_processor = self.get_image_processor()
+        tokenizer = self.get_tokenizer(padding_side="right")
+
+        processor = IdeficsProcessor(tokenizer=tokenizer, image_processor=image_processor)
+
+        predicted_tokens = [
+            "<s>Describe this image.\nAssistant:<unk><unk><unk><unk><unk><unk><unk><unk><unk>",
+            "<s>Describe this image.\nAssistant:<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>",
+        ]
+
+        prompts = [[prompt] for prompt in self.prepare_prompts()[2]]
+        max_length = processor(prompts, padding="max_length", truncation=True, max_length=20)
+        longest = processor(prompts, padding="longest", truncation=True, max_length=30)
+        decoded_max_length = processor.tokenizer.decode(max_length["input_ids"][-1])
+        decoded_longest = processor.tokenizer.decode(longest["input_ids"][-1])
+        self.assertEqual(decoded_max_length, predicted_tokens[1])
+        self.assertEqual(decoded_longest, predicted_tokens[0])
+
     def test_model_input_names(self):
         image_processor = self.get_image_processor()
         tokenizer = self.get_tokenizer()