From be37d34f44ff1bc928e59ffb8a30adecab8835a8 Mon Sep 17 00:00:00 2001 From: Yoni Gozlan <74535834+yonigozlan@users.noreply.github.com> Date: Tue, 25 Mar 2025 17:32:17 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A8Deprecate=20legacy=20argument=20for?= =?UTF-8?q?=20image-text-to-text=20models=20and=20adopt=20new=20behavior?= =?UTF-8?q?=20by=20default=20(#36307)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * deprecate legacy argument and adopt new behavior by default * revert back modification git --- src/transformers/models/donut/processing_donut.py | 13 +------------ src/transformers/models/git/processing_git.py | 12 ------------ .../models/pix2struct/processing_pix2struct.py | 11 +---------- 3 files changed, 2 insertions(+), 34 deletions(-) diff --git a/src/transformers/models/donut/processing_donut.py b/src/transformers/models/donut/processing_donut.py index 04ddf901c6..689aa5122f 100644 --- a/src/transformers/models/donut/processing_donut.py +++ b/src/transformers/models/donut/processing_donut.py @@ -88,17 +88,6 @@ class DonutProcessor(ProcessorMixin): [`~DonutProcessor.as_target_processor`] this method forwards all its arguments to DonutTokenizer's [`~DonutTokenizer.__call__`]. Please refer to the docstring of the above two methods for more information. """ - # For backward compatibility - legacy = kwargs.pop("legacy", True) - if legacy: - # With `add_special_tokens=True`, the performance of donut are degraded when working with both images and text. - logger.warning_once( - "Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. " - "In the new behavior, if both images and text are provided, the default value of `add_special_tokens` " - "will be changed to `False` when calling the tokenizer if `add_special_tokens` is unset. " - "To test the new behavior, set `legacy=False`as a processor call argument." - ) - if self._in_target_context_manager: return self.current_processor(images, text, **kwargs) @@ -114,7 +103,7 @@ class DonutProcessor(ProcessorMixin): if images is not None: inputs = self.image_processor(images, **output_kwargs["images_kwargs"]) if text is not None: - if not legacy and images is not None: + if images is not None: output_kwargs["text_kwargs"].setdefault("add_special_tokens", False) encodings = self.tokenizer(text, **output_kwargs["text_kwargs"]) diff --git a/src/transformers/models/git/processing_git.py b/src/transformers/models/git/processing_git.py index 2f1b35cb7c..98cc3b83cf 100644 --- a/src/transformers/models/git/processing_git.py +++ b/src/transformers/models/git/processing_git.py @@ -95,15 +95,6 @@ class GitProcessor(ProcessorMixin): `None`). - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`. """ - legacy = kwargs.pop("legacy", True) - if legacy: - logger.warning_once( - "Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. " - "In the new behavior, if both images and text are provided, the last token (EOS token) " - "of the input_ids and attention_mask tensors will be removed. " - "To test the new behavior, set `legacy=False`as a processor call argument." - ) - if text is None and images is None: raise ValueError("You have to specify either text or images. Both cannot be none.") @@ -123,9 +114,6 @@ class GitProcessor(ProcessorMixin): if images is not None: image_features = self.image_processor(images, **output_kwargs["images_kwargs"]) data.update(image_features) - if not legacy: - data["input_ids"] = data["input_ids"][:, :-1] - data["attention_mask"] = data["attention_mask"][:, :-1] return BatchFeature(data=data, tensor_type=output_kwargs["common_kwargs"].get("return_tensors")) diff --git a/src/transformers/models/pix2struct/processing_pix2struct.py b/src/transformers/models/pix2struct/processing_pix2struct.py index ac9802dac8..f9b1fcc440 100644 --- a/src/transformers/models/pix2struct/processing_pix2struct.py +++ b/src/transformers/models/pix2struct/processing_pix2struct.py @@ -89,15 +89,6 @@ class Pix2StructProcessor(ProcessorMixin): Please refer to the docstring of the above two methods for more information. """ - legacy = kwargs.pop("legacy", True) - if legacy: - logger.warning_once( - "Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. " - "In the new behavior, If both images and text are provided, image_processor is not a VQA processor, and `add_special_tokens` is unset, " - "the default value of `add_special_tokens` will be changed to `False` when calling the tokenizer. " - "To test the new behavior, set `legacy=False`as a processor call argument." - ) - if images is None and text is None: raise ValueError("You have to specify either images or text.") @@ -126,7 +117,7 @@ class Pix2StructProcessor(ProcessorMixin): if text is not None and not self.image_processor.is_vqa: output_kwargs["text_kwargs"]["add_special_tokens"] = ( - add_special_tokens if add_special_tokens is not None else legacy + add_special_tokens if add_special_tokens is not None else False ) text_encoding = self.tokenizer(text=text, **output_kwargs["text_kwargs"])