diff --git a/docs/source/en/model_doc/pvt.md b/docs/source/en/model_doc/pvt.md index d4c80445bf..daa4806bc3 100644 --- a/docs/source/en/model_doc/pvt.md +++ b/docs/source/en/model_doc/pvt.md @@ -64,6 +64,11 @@ This model was contributed by [Xrenya](https://huggingface.co/Xrenya). The origi [[autodoc]] PvtImageProcessor - preprocess +## PvtImageProcessorFast + +[[autodoc]] PvtImageProcessorFast + - preprocess + ## PvtForImageClassification [[autodoc]] PvtForImageClassification diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 0ee196aad1..01164f3a20 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -133,8 +133,8 @@ else: ("pixtral", ("PixtralImageProcessor", "PixtralImageProcessorFast")), ("poolformer", ("PoolFormerImageProcessor",)), ("prompt_depth_anything", ("PromptDepthAnythingImageProcessor",)), - ("pvt", ("PvtImageProcessor",)), - ("pvt_v2", ("PvtImageProcessor",)), + ("pvt", ("PvtImageProcessor", "PvtImageProcessorFast")), + ("pvt_v2", ("PvtImageProcessor", "PvtImageProcessorFast")), ("qwen2_5_vl", ("Qwen2VLImageProcessor", "Qwen2VLImageProcessorFast")), ("qwen2_vl", ("Qwen2VLImageProcessor", "Qwen2VLImageProcessorFast")), ("regnet", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")), diff --git a/src/transformers/models/pvt/__init__.py b/src/transformers/models/pvt/__init__.py index 3a7448336f..371478776d 100644 --- a/src/transformers/models/pvt/__init__.py +++ b/src/transformers/models/pvt/__init__.py @@ -20,6 +20,7 @@ from ...utils.import_utils import define_import_structure if TYPE_CHECKING: from .configuration_pvt import * from .image_processing_pvt import * + from .image_processing_pvt_fast import * from .modeling_pvt import * else: import sys diff --git a/src/transformers/models/pvt/image_processing_pvt_fast.py b/src/transformers/models/pvt/image_processing_pvt_fast.py new file mode 100644 index 0000000000..a371c8b32b --- /dev/null +++ b/src/transformers/models/pvt/image_processing_pvt_fast.py @@ -0,0 +1,44 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Fast Image processor class for Pvt.""" + +from ...image_processing_utils_fast import ( + BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, + BaseImageProcessorFast, +) +from ...image_utils import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, PILImageResampling +from ...utils import add_start_docstrings + + +@add_start_docstrings( + "Constructs a fast Pvt image processor.", + BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, +) +class PvtImageProcessorFast(BaseImageProcessorFast): + resample = PILImageResampling.BILINEAR + image_mean = IMAGENET_DEFAULT_MEAN + image_std = IMAGENET_DEFAULT_STD + size = {"height": 224, "width": 224} + default_to_square = True + crop_size = None + do_resize = True + do_center_crop = None + do_rescale = True + do_normalize = True + do_convert_rgb = None + model_input_names = ["pixel_values"] + + +__all__ = ["PvtImageProcessorFast"] diff --git a/tests/models/pvt/test_image_processing_pvt.py b/tests/models/pvt/test_image_processing_pvt.py index a04ce7d025..92e800cb1b 100644 --- a/tests/models/pvt/test_image_processing_pvt.py +++ b/tests/models/pvt/test_image_processing_pvt.py @@ -16,7 +16,7 @@ import unittest from transformers.testing_utils import require_torch, require_vision -from transformers.utils import is_vision_available +from transformers.utils import is_torchvision_available, is_vision_available from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs @@ -24,6 +24,9 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im if is_vision_available(): from transformers import PvtImageProcessor + if is_torchvision_available(): + from transformers import PvtImageProcessorFast + class PvtImageProcessingTester: def __init__( @@ -81,6 +84,7 @@ class PvtImageProcessingTester: @require_vision class PvtImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): image_processing_class = PvtImageProcessor if is_vision_available() else None + fast_image_processing_class = PvtImageProcessorFast if is_torchvision_available() else None def setUp(self): super().setUp() @@ -91,16 +95,18 @@ class PvtImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): return self.image_processor_tester.prepare_image_processor_dict() def test_image_processor_properties(self): - image_processing = self.image_processing_class(**self.image_processor_dict) - self.assertTrue(hasattr(image_processing, "image_mean")) - self.assertTrue(hasattr(image_processing, "image_std")) - self.assertTrue(hasattr(image_processing, "do_normalize")) - self.assertTrue(hasattr(image_processing, "do_resize")) - self.assertTrue(hasattr(image_processing, "size")) + for image_processing_class in self.image_processor_list: + image_processing = image_processing_class(**self.image_processor_dict) + self.assertTrue(hasattr(image_processing, "image_mean")) + self.assertTrue(hasattr(image_processing, "image_std")) + self.assertTrue(hasattr(image_processing, "do_normalize")) + self.assertTrue(hasattr(image_processing, "do_resize")) + self.assertTrue(hasattr(image_processing, "size")) def test_image_processor_from_dict_with_kwargs(self): - image_processor = self.image_processing_class.from_dict(self.image_processor_dict) - self.assertEqual(image_processor.size, {"height": 18, "width": 18}) + for image_processing_class in self.image_processor_list: + image_processor = image_processing_class.from_dict(self.image_processor_dict) + self.assertEqual(image_processor.size, {"height": 18, "width": 18}) - image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42) - self.assertEqual(image_processor.size, {"height": 42, "width": 42}) + image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42) + self.assertEqual(image_processor.size, {"height": 42, "width": 42})