🚨Deprecate legacy argument for image-text-to-text models and adopt new behavior by default (#36307)
* deprecate legacy argument and adopt new behavior by default * revert back modification git
This commit is contained in:
@@ -88,17 +88,6 @@ class DonutProcessor(ProcessorMixin):
|
|||||||
[`~DonutProcessor.as_target_processor`] this method forwards all its arguments to DonutTokenizer's
|
[`~DonutProcessor.as_target_processor`] this method forwards all its arguments to DonutTokenizer's
|
||||||
[`~DonutTokenizer.__call__`]. Please refer to the docstring of the above two methods for more information.
|
[`~DonutTokenizer.__call__`]. Please refer to the docstring of the above two methods for more information.
|
||||||
"""
|
"""
|
||||||
# For backward compatibility
|
|
||||||
legacy = kwargs.pop("legacy", True)
|
|
||||||
if legacy:
|
|
||||||
# With `add_special_tokens=True`, the performance of donut are degraded when working with both images and text.
|
|
||||||
logger.warning_once(
|
|
||||||
"Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. "
|
|
||||||
"In the new behavior, if both images and text are provided, the default value of `add_special_tokens` "
|
|
||||||
"will be changed to `False` when calling the tokenizer if `add_special_tokens` is unset. "
|
|
||||||
"To test the new behavior, set `legacy=False`as a processor call argument."
|
|
||||||
)
|
|
||||||
|
|
||||||
if self._in_target_context_manager:
|
if self._in_target_context_manager:
|
||||||
return self.current_processor(images, text, **kwargs)
|
return self.current_processor(images, text, **kwargs)
|
||||||
|
|
||||||
@@ -114,7 +103,7 @@ class DonutProcessor(ProcessorMixin):
|
|||||||
if images is not None:
|
if images is not None:
|
||||||
inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
|
inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
|
||||||
if text is not None:
|
if text is not None:
|
||||||
if not legacy and images is not None:
|
if images is not None:
|
||||||
output_kwargs["text_kwargs"].setdefault("add_special_tokens", False)
|
output_kwargs["text_kwargs"].setdefault("add_special_tokens", False)
|
||||||
encodings = self.tokenizer(text, **output_kwargs["text_kwargs"])
|
encodings = self.tokenizer(text, **output_kwargs["text_kwargs"])
|
||||||
|
|
||||||
|
|||||||
@@ -95,15 +95,6 @@ class GitProcessor(ProcessorMixin):
|
|||||||
`None`).
|
`None`).
|
||||||
- **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
|
- **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
|
||||||
"""
|
"""
|
||||||
legacy = kwargs.pop("legacy", True)
|
|
||||||
if legacy:
|
|
||||||
logger.warning_once(
|
|
||||||
"Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. "
|
|
||||||
"In the new behavior, if both images and text are provided, the last token (EOS token) "
|
|
||||||
"of the input_ids and attention_mask tensors will be removed. "
|
|
||||||
"To test the new behavior, set `legacy=False`as a processor call argument."
|
|
||||||
)
|
|
||||||
|
|
||||||
if text is None and images is None:
|
if text is None and images is None:
|
||||||
raise ValueError("You have to specify either text or images. Both cannot be none.")
|
raise ValueError("You have to specify either text or images. Both cannot be none.")
|
||||||
|
|
||||||
@@ -123,9 +114,6 @@ class GitProcessor(ProcessorMixin):
|
|||||||
if images is not None:
|
if images is not None:
|
||||||
image_features = self.image_processor(images, **output_kwargs["images_kwargs"])
|
image_features = self.image_processor(images, **output_kwargs["images_kwargs"])
|
||||||
data.update(image_features)
|
data.update(image_features)
|
||||||
if not legacy:
|
|
||||||
data["input_ids"] = data["input_ids"][:, :-1]
|
|
||||||
data["attention_mask"] = data["attention_mask"][:, :-1]
|
|
||||||
|
|
||||||
return BatchFeature(data=data, tensor_type=output_kwargs["common_kwargs"].get("return_tensors"))
|
return BatchFeature(data=data, tensor_type=output_kwargs["common_kwargs"].get("return_tensors"))
|
||||||
|
|
||||||
|
|||||||
@@ -89,15 +89,6 @@ class Pix2StructProcessor(ProcessorMixin):
|
|||||||
|
|
||||||
Please refer to the docstring of the above two methods for more information.
|
Please refer to the docstring of the above two methods for more information.
|
||||||
"""
|
"""
|
||||||
legacy = kwargs.pop("legacy", True)
|
|
||||||
if legacy:
|
|
||||||
logger.warning_once(
|
|
||||||
"Legacy behavior is being used. The current behavior will be deprecated in version 5.0.0. "
|
|
||||||
"In the new behavior, If both images and text are provided, image_processor is not a VQA processor, and `add_special_tokens` is unset, "
|
|
||||||
"the default value of `add_special_tokens` will be changed to `False` when calling the tokenizer. "
|
|
||||||
"To test the new behavior, set `legacy=False`as a processor call argument."
|
|
||||||
)
|
|
||||||
|
|
||||||
if images is None and text is None:
|
if images is None and text is None:
|
||||||
raise ValueError("You have to specify either images or text.")
|
raise ValueError("You have to specify either images or text.")
|
||||||
|
|
||||||
@@ -126,7 +117,7 @@ class Pix2StructProcessor(ProcessorMixin):
|
|||||||
|
|
||||||
if text is not None and not self.image_processor.is_vqa:
|
if text is not None and not self.image_processor.is_vqa:
|
||||||
output_kwargs["text_kwargs"]["add_special_tokens"] = (
|
output_kwargs["text_kwargs"]["add_special_tokens"] = (
|
||||||
add_special_tokens if add_special_tokens is not None else legacy
|
add_special_tokens if add_special_tokens is not None else False
|
||||||
)
|
)
|
||||||
text_encoding = self.tokenizer(text=text, **output_kwargs["text_kwargs"])
|
text_encoding = self.tokenizer(text=text, **output_kwargs["text_kwargs"])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user