From abd253103478b21faafb7c9a6e7a1a7d1effe757 Mon Sep 17 00:00:00 2001 From: Shauray Singh <39147312+shauray8@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:26:07 +0530 Subject: [PATCH] Fix padding for IDEFICS (#26396) * fix * fixup * tests * fixup --- .../models/idefics/processing_idefics.py | 21 ++++++++++--------- .../models/idefics/test_processor_idefics.py | 19 +++++++++++++++++ 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/transformers/models/idefics/processing_idefics.py b/src/transformers/models/idefics/processing_idefics.py index c1d8485c53..e6e0a9254a 100644 --- a/src/transformers/models/idefics/processing_idefics.py +++ b/src/transformers/models/idefics/processing_idefics.py @@ -280,7 +280,7 @@ class IdeficsProcessor(ProcessorMixin): else: return fake_token + image_token + fake_token - all_texts = [] + all_prompts = [] all_images = [] for sample in prompts: # the model was trained on samples starting with @@ -321,17 +321,18 @@ class IdeficsProcessor(ProcessorMixin): image_objects = self.image_processor(image_objects, transform=transform) - text_encoding = self.tokenizer( - text=full_text, - add_special_tokens=False, - padding=padding, - truncation=truncation, - max_length=max_length, - ) - - all_texts.append(text_encoding["input_ids"]) + all_prompts.append(full_text) all_images.append(image_objects) + text_encoding = self.tokenizer( + text=all_prompts, + add_special_tokens=False, + padding=padding, + truncation=truncation, + max_length=max_length, + ) + all_texts = text_encoding["input_ids"] + max_seq_len = max(len(x) for x in all_texts) # max_num_images has to be at least 1 even when there are no images diff --git a/tests/models/idefics/test_processor_idefics.py b/tests/models/idefics/test_processor_idefics.py index 4ad11d31ae..523b7a5515 100644 --- a/tests/models/idefics/test_processor_idefics.py +++ b/tests/models/idefics/test_processor_idefics.py @@ -141,6 +141,25 @@ class IdeficsProcessorTest(TestCasePlus): self.assertListEqual(decoded_tok, decoded_processor) + def test_tokenizer_padding(self): + image_processor = self.get_image_processor() + tokenizer = self.get_tokenizer(padding_side="right") + + processor = IdeficsProcessor(tokenizer=tokenizer, image_processor=image_processor) + + predicted_tokens = [ + "Describe this image.\nAssistant:", + "Describe this image.\nAssistant:", + ] + + prompts = [[prompt] for prompt in self.prepare_prompts()[2]] + max_length = processor(prompts, padding="max_length", truncation=True, max_length=20) + longest = processor(prompts, padding="longest", truncation=True, max_length=30) + decoded_max_length = processor.tokenizer.decode(max_length["input_ids"][-1]) + decoded_longest = processor.tokenizer.decode(longest["input_ids"][-1]) + self.assertEqual(decoded_max_length, predicted_tokens[1]) + self.assertEqual(decoded_longest, predicted_tokens[0]) + def test_model_input_names(self): image_processor = self.get_image_processor() tokenizer = self.get_tokenizer()