From e40bb4845e0eefb52ec1e9cac9c2446ab36aef81 Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Thu, 19 Sep 2024 09:56:52 +0200 Subject: [PATCH] Load and save video-processor from separate folder (#33562) * load and save from video-processor folder * Update src/transformers/models/llava_onevision/processing_llava_onevision.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- .../image_processing_llava_onevision.py | 1 + .../processing_llava_onevision.py | 53 ++++++++++++++++++- .../test_processing_llava_onevision.py | 21 ++++---- tests/test_processing_common.py | 8 +++ 4 files changed, 71 insertions(+), 12 deletions(-) diff --git a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py index 3dddcdd148..2047557208 100644 --- a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py @@ -621,6 +621,7 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): """ do_resize = do_resize if do_resize is not None else self.do_resize size = size if size is not None else self.size + size = get_size_dict(size, default_to_square=False) image_grid_pinpoints = image_grid_pinpoints if image_grid_pinpoints is not None else self.image_grid_pinpoints resample = resample if resample is not None else self.resample do_rescale = do_rescale if do_rescale is not None else self.do_rescale diff --git a/src/transformers/models/llava_onevision/processing_llava_onevision.py b/src/transformers/models/llava_onevision/processing_llava_onevision.py index e050ec3f31..d4ae02e0bb 100644 --- a/src/transformers/models/llava_onevision/processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/processing_llava_onevision.py @@ -17,6 +17,7 @@ Processor class for LLaVa-Onevision. """ import math +import os import sys from typing import Iterable, List, Union @@ -34,6 +35,11 @@ from ...processing_utils import ( ProcessorMixin, ) from ...tokenization_utils_base import PreTokenizedInput, TextInput +from ...utils import logging +from ..auto import AutoImageProcessor + + +logger = logging.get_logger(__name__) class LlavaOnevisionProcessorKwargs(ProcessingKwargs, total=False): @@ -96,7 +102,7 @@ class LlavaOnevisionProcessor(ProcessorMixin): chat_template=None, image_token="", video_token="