Update kwargs validation for preprocess with decorator (#32024)
* BLIP preprocess * BIT preprocess * BRIDGETOWER preprocess * CHAMELEON preprocess * CHINESE_CLIP preprocess * CONVNEXT preprocess * DEIT preprocess * DONUT preprocess * DPT preprocess * FLAVA preprocess * EFFICIENTNET preprocess * FUYU preprocess * GLPN preprocess * IMAGEGPT preprocess * INTRUCTBLIPVIDEO preprocess * VIVIT preprocess * ZOEDEPTH preprocess * VITMATTE preprocess * VIT preprocess * VILT preprocess * VIDEOMAE preprocess * VIDEOLLAVA * TVP processing * TVP fixup * SWIN2SR preprocess * SIGLIP preprocess * SAM preprocess * RT-DETR preprocess * PVT preprocess * POOLFORMER preprocess * PERCEIVER preprocess * OWLVIT preprocess * OWLV2 preprocess * NOUGAT preprocess * MOBILEVIT preprocess * MOBILENETV2 preprocess * MOBILENETV1 preprocess * LEVIT preprocess * LAYOUTLMV2 preprocess * LAYOUTLMV3 preprocess * Add test * Update tests
This commit is contained in:
committed by
GitHub
parent
e85d86398a
commit
fb66ef8147
@@ -36,10 +36,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -122,23 +121,6 @@ class BitImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
|
||||
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_convert_rgb",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize
|
||||
def resize(
|
||||
@@ -190,6 +172,7 @@ class BitImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -207,7 +190,6 @@ class BitImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -274,8 +256,6 @@ class BitImageProcessor(BaseImageProcessor):
|
||||
image_std = image_std if image_std is not None else self.image_std
|
||||
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
if not valid_images(images):
|
||||
|
||||
@@ -31,10 +31,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -107,21 +106,6 @@ class BlipImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
|
||||
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_convert_rgb",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize with PILImageResampling.BILINEAR->PILImageResampling.BICUBIC
|
||||
def resize(
|
||||
@@ -172,6 +156,7 @@ class BlipImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -187,7 +172,6 @@ class BlipImageProcessor(BaseImageProcessor):
|
||||
do_convert_rgb: bool = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -250,8 +234,6 @@ class BlipImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -32,10 +32,9 @@ from ...image_utils import (
|
||||
is_scaled_image,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -205,24 +204,6 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
|
||||
self.do_pad = do_pad
|
||||
self.do_center_crop = do_center_crop
|
||||
self.crop_size = crop_size
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"size_divisor",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_pad",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor.resize
|
||||
def resize(
|
||||
@@ -389,6 +370,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
|
||||
|
||||
return BatchFeature(data=data, tensor_type=return_tensors)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -407,7 +389,6 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -484,8 +465,6 @@ class BridgeTowerImageProcessor(BaseImageProcessor):
|
||||
size = size if size is not None else self.size
|
||||
size = get_size_dict(size, default_to_square=False)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not is_batched(images):
|
||||
images = [images]
|
||||
|
||||
|
||||
@@ -33,10 +33,9 @@ from ...image_utils import (
|
||||
is_valid_image,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -141,23 +140,6 @@ class ChameleonImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else [1.0, 1.0, 1.0]
|
||||
self.image_std = image_std if image_std is not None else [1.0, 1.0, 1.0]
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_convert_rgb",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize
|
||||
def resize(
|
||||
@@ -209,6 +191,7 @@ class ChameleonImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -226,7 +209,6 @@ class ChameleonImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -293,8 +275,6 @@ class ChameleonImageProcessor(BaseImageProcessor):
|
||||
image_std = image_std if image_std is not None else self.image_std
|
||||
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
images = make_batched_images(images)
|
||||
|
||||
if not valid_images(images):
|
||||
|
||||
@@ -36,10 +36,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -122,23 +121,6 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
|
||||
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_convert_rgb",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -179,6 +161,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -196,7 +179,6 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -265,8 +247,6 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -36,10 +36,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -114,21 +113,6 @@ class ConvNextImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"crop_pct",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -199,6 +183,7 @@ class ConvNextImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -214,7 +199,6 @@ class ConvNextImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -276,8 +260,6 @@ class ConvNextImageProcessor(BaseImageProcessor):
|
||||
size = size if size is not None else self.size
|
||||
size = get_size_dict(size, default_to_square=False)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
if not valid_images(images):
|
||||
|
||||
@@ -31,10 +31,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -110,22 +109,6 @@ class DeiTImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize with PILImageResampling.BILINEAR->PILImageResampling.BICUBIC
|
||||
def resize(
|
||||
@@ -176,6 +159,7 @@ class DeiTImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -192,7 +176,6 @@ class DeiTImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -257,8 +240,6 @@ class DeiTImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -37,10 +37,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
from ...utils.import_utils import is_vision_available
|
||||
|
||||
|
||||
@@ -124,24 +123,6 @@ class DonutImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_thumbnail",
|
||||
"do_align_long_axis",
|
||||
"do_pad",
|
||||
"random_padding",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def align_long_axis(
|
||||
self,
|
||||
@@ -314,6 +295,7 @@ class DonutImageProcessor(BaseImageProcessor):
|
||||
)
|
||||
return resized_image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -332,7 +314,6 @@ class DonutImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -407,8 +388,6 @@ class DonutImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -35,10 +35,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -165,24 +164,6 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self.do_pad = do_pad
|
||||
self.size_divisor = size_divisor
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"keep_aspect_ratio",
|
||||
"ensure_multiple_of",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_pad",
|
||||
"size_divisor",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -284,6 +265,7 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
|
||||
return pad(image, ((pad_size_left, pad_size_right), (pad_size_top, pad_size_bottom)), data_format=data_format)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -302,7 +284,6 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -369,8 +350,6 @@ class DPTImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -31,10 +31,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -119,24 +118,6 @@ class EfficientNetImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self.include_top = include_top
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"rescale_offset",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"include_top",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize with PILImageResampling.BILINEAR->PILImageResampling.NEAREST
|
||||
def resize(
|
||||
@@ -227,6 +208,7 @@ class EfficientNetImageProcessor(BaseImageProcessor):
|
||||
|
||||
return rescaled_image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -245,7 +227,6 @@ class EfficientNetImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -316,8 +297,6 @@ class EfficientNetImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -34,10 +34,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -302,41 +301,6 @@ class FlavaImageProcessor(BaseImageProcessor):
|
||||
self.codebook_image_mean = codebook_image_mean
|
||||
self.codebook_image_mean = codebook_image_mean if codebook_image_mean is not None else FLAVA_CODEBOOK_MEAN
|
||||
self.codebook_image_std = codebook_image_std if codebook_image_std is not None else FLAVA_CODEBOOK_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_image_mask",
|
||||
"input_size_patches",
|
||||
"total_mask_patches",
|
||||
"mask_group_min_patches",
|
||||
"mask_group_max_patches",
|
||||
"mask_group_min_aspect_ratio",
|
||||
"mask_group_max_aspect_ratio",
|
||||
"return_codebook_pixels",
|
||||
"codebook_do_resize",
|
||||
"codebook_size",
|
||||
"codebook_resample",
|
||||
"codebook_do_center_crop",
|
||||
"codebook_crop_size",
|
||||
"codebook_do_rescale",
|
||||
"codebook_rescale_factor",
|
||||
"codebook_do_map_pixels",
|
||||
"codebook_do_normalize",
|
||||
"codebook_image_mean",
|
||||
"codebook_image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
|
||||
@@ -486,6 +450,7 @@ class FlavaImageProcessor(BaseImageProcessor):
|
||||
image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format)
|
||||
return image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -523,7 +488,6 @@ class FlavaImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -672,8 +636,6 @@ class FlavaImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -39,6 +39,7 @@ from ...image_utils import (
|
||||
)
|
||||
from ...utils import (
|
||||
TensorType,
|
||||
filter_out_non_signature_kwargs,
|
||||
is_torch_available,
|
||||
is_torch_device,
|
||||
is_torch_dtype,
|
||||
@@ -261,24 +262,6 @@ class FuyuImageProcessor(BaseImageProcessor):
|
||||
self.do_rescale = do_rescale
|
||||
self.rescale_factor = rescale_factor
|
||||
self.patch_size = patch_size if patch_size is not None else {"height": 30, "width": 30}
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_pad",
|
||||
"padding_value",
|
||||
"padding_mode",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"patch_size",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -376,6 +359,7 @@ class FuyuImageProcessor(BaseImageProcessor):
|
||||
)
|
||||
return padded_image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images,
|
||||
|
||||
@@ -30,10 +30,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -72,16 +71,6 @@ class GLPNImageProcessor(BaseImageProcessor):
|
||||
self.size_divisor = size_divisor
|
||||
self.resample = resample
|
||||
super().__init__(**kwargs)
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size_divisor",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -133,6 +122,7 @@ class GLPNImageProcessor(BaseImageProcessor):
|
||||
)
|
||||
return image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: Union["PIL.Image.Image", TensorType, List["PIL.Image.Image"], List[TensorType]],
|
||||
@@ -143,7 +133,6 @@ class GLPNImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[TensorType, str]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> BatchFeature:
|
||||
"""
|
||||
Preprocess the given images.
|
||||
@@ -187,8 +176,6 @@ class GLPNImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -29,10 +29,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -103,18 +102,6 @@ class ImageGPTImageProcessor(BaseImageProcessor):
|
||||
self.resample = resample
|
||||
self.do_normalize = do_normalize
|
||||
self.do_color_quantize = do_color_quantize
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_normalize",
|
||||
"do_color_quantize",
|
||||
"clusters",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize
|
||||
def resize(
|
||||
@@ -186,6 +173,7 @@ class ImageGPTImageProcessor(BaseImageProcessor):
|
||||
image = image - 1
|
||||
return image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -198,7 +186,6 @@ class ImageGPTImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[Union[str, ChannelDimension]] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -251,8 +238,6 @@ class ImageGPTImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -35,10 +35,9 @@ from ...image_utils import (
|
||||
is_valid_image,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -128,21 +127,6 @@ class InstructBlipVideoImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
|
||||
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_convert_rgb",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize with PILImageResampling.BILINEAR->PILImageResampling.BICUBIC
|
||||
def resize(
|
||||
@@ -195,6 +179,7 @@ class InstructBlipVideoImageProcessor(BaseImageProcessor):
|
||||
)
|
||||
|
||||
# Ignore copy
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: VideoInput = None,
|
||||
@@ -210,7 +195,6 @@ class InstructBlipVideoImageProcessor(BaseImageProcessor):
|
||||
do_convert_rgb: bool = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess a video or batch of images/videos.
|
||||
@@ -272,7 +256,6 @@ class InstructBlipVideoImageProcessor(BaseImageProcessor):
|
||||
size = get_size_dict(size, default_to_square=False)
|
||||
|
||||
videos = make_batched_videos(images)
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
validate_preprocess_arguments(
|
||||
do_rescale=do_rescale,
|
||||
|
||||
@@ -28,10 +28,16 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_pytesseract_available, is_vision_available, logging, requires_backends
|
||||
from ...utils import (
|
||||
TensorType,
|
||||
filter_out_non_signature_kwargs,
|
||||
is_pytesseract_available,
|
||||
is_vision_available,
|
||||
logging,
|
||||
requires_backends,
|
||||
)
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -138,18 +144,6 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor):
|
||||
self.apply_ocr = apply_ocr
|
||||
self.ocr_lang = ocr_lang
|
||||
self.tesseract_config = tesseract_config
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"apply_ocr",
|
||||
"ocr_lang",
|
||||
"tesseract_config",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize
|
||||
def resize(
|
||||
@@ -200,6 +194,7 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -212,7 +207,6 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -257,8 +251,6 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -31,10 +31,16 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_pytesseract_available, is_vision_available, logging, requires_backends
|
||||
from ...utils import (
|
||||
TensorType,
|
||||
filter_out_non_signature_kwargs,
|
||||
is_pytesseract_available,
|
||||
is_vision_available,
|
||||
logging,
|
||||
requires_backends,
|
||||
)
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -165,23 +171,6 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor):
|
||||
self.apply_ocr = apply_ocr
|
||||
self.ocr_lang = ocr_lang
|
||||
self.tesseract_config = tesseract_config
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"apply_ocr",
|
||||
"ocr_lang",
|
||||
"tesseract_config",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize
|
||||
def resize(
|
||||
@@ -232,6 +221,7 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -249,7 +239,6 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -316,8 +305,6 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor):
|
||||
tesseract_config = tesseract_config if tesseract_config is not None else self.tesseract_config
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -35,10 +35,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -116,22 +115,6 @@ class LevitImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -188,6 +171,7 @@ class LevitImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -204,7 +188,6 @@ class LevitImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[TensorType] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> BatchFeature:
|
||||
"""
|
||||
Preprocess an image or batch of images to be used as input to a LeViT model.
|
||||
@@ -271,8 +254,6 @@ class LevitImageProcessor(BaseImageProcessor):
|
||||
crop_size = get_size_dict(crop_size, param_name="crop_size")
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -35,10 +35,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -114,22 +113,6 @@ class MobileNetV1ImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.clip.image_processing_clip.CLIPImageProcessor.resize
|
||||
def resize(
|
||||
@@ -181,6 +164,7 @@ class MobileNetV1ImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -197,7 +181,6 @@ class MobileNetV1ImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -262,8 +245,6 @@ class MobileNetV1ImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -35,10 +35,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_torch_available, is_torch_tensor, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_torch_available, is_torch_tensor, logging
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -118,22 +117,6 @@ class MobileNetV2ImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.mobilenet_v1.image_processing_mobilenet_v1.MobileNetV1ImageProcessor.resize
|
||||
def resize(
|
||||
@@ -185,6 +168,7 @@ class MobileNetV2ImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -201,7 +185,6 @@ class MobileNetV2ImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -266,8 +249,6 @@ class MobileNetV2ImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -29,10 +29,16 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging
|
||||
from ...utils import (
|
||||
TensorType,
|
||||
filter_out_non_signature_kwargs,
|
||||
is_torch_available,
|
||||
is_torch_tensor,
|
||||
is_vision_available,
|
||||
logging,
|
||||
)
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -105,21 +111,6 @@ class MobileViTImageProcessor(BaseImageProcessor):
|
||||
self.do_center_crop = do_center_crop
|
||||
self.crop_size = crop_size
|
||||
self.do_flip_channel_order = do_flip_channel_order
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"segmentation_maps",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_flip_channel_order",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.mobilenet_v1.image_processing_mobilenet_v1.MobileNetV1ImageProcessor.resize with PILImageResampling.BICUBIC->PILImageResampling.BILINEAR
|
||||
def resize(
|
||||
@@ -306,6 +297,7 @@ class MobileViTImageProcessor(BaseImageProcessor):
|
||||
segmentation_map = segmentation_map.astype(np.int64)
|
||||
return segmentation_map
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -321,7 +313,6 @@ class MobileViTImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -383,8 +374,6 @@ class MobileViTImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if segmentation_maps is not None:
|
||||
segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)
|
||||
|
||||
|
||||
@@ -38,10 +38,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
from ...utils.import_utils import is_cv2_available, is_vision_available
|
||||
|
||||
|
||||
@@ -126,24 +125,6 @@ class NougatImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_crop_margin",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_thumbnail",
|
||||
"do_align_long_axis",
|
||||
"do_pad",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def python_find_non_zero(self, image: np.array):
|
||||
"""This is a reimplementation of a findNonZero function equivalent to cv2."""
|
||||
@@ -375,6 +356,7 @@ class NougatImageProcessor(BaseImageProcessor):
|
||||
)
|
||||
return resized_image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -393,7 +375,6 @@ class NougatImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -461,8 +442,6 @@ class NougatImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -37,11 +37,11 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import (
|
||||
TensorType,
|
||||
filter_out_non_signature_kwargs,
|
||||
is_scipy_available,
|
||||
is_torch_available,
|
||||
is_vision_available,
|
||||
@@ -233,20 +233,6 @@ class Owlv2ImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
|
||||
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_pad",
|
||||
"do_resize",
|
||||
"size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def pad(
|
||||
self,
|
||||
@@ -346,6 +332,7 @@ class Owlv2ImageProcessor(BaseImageProcessor):
|
||||
)
|
||||
return image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -360,7 +347,6 @@ class Owlv2ImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -416,8 +402,6 @@ class Owlv2ImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -38,10 +38,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_torch_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_torch_available, logging
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -167,22 +166,6 @@ class OwlViTImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
|
||||
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -285,6 +268,7 @@ class OwlViTImageProcessor(BaseImageProcessor):
|
||||
"""
|
||||
return rescale(image, rescale_factor, data_format=data_format, input_data_format=input_data_format)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -301,7 +285,6 @@ class OwlViTImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[TensorType, str]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> BatchFeature:
|
||||
"""
|
||||
Prepares an image or batch of images for the model.
|
||||
@@ -373,7 +356,6 @@ class OwlViTImageProcessor(BaseImageProcessor):
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
"torch.Tensor, tf.Tensor or jax.ndarray."
|
||||
)
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
validate_preprocess_arguments(
|
||||
do_rescale=do_rescale,
|
||||
|
||||
@@ -32,10 +32,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -114,22 +113,6 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def center_crop(
|
||||
self,
|
||||
@@ -224,6 +207,7 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -240,7 +224,6 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -303,8 +286,6 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -35,10 +35,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -133,23 +132,6 @@ class PoolFormerImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"crop_pct",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -227,6 +209,7 @@ class PoolFormerImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -244,7 +227,6 @@ class PoolFormerImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -311,8 +293,6 @@ class PoolFormerImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -31,10 +31,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -96,20 +95,6 @@ class PvtImageProcessor(BaseImageProcessor):
|
||||
self.rescale_factor = rescale_factor
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
# Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize
|
||||
def resize(
|
||||
@@ -160,6 +145,7 @@ class PvtImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -174,7 +160,6 @@ class PvtImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -233,8 +218,6 @@ class PvtImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -45,10 +45,10 @@ from ...image_utils import (
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_annotations,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import (
|
||||
filter_out_non_signature_kwargs,
|
||||
is_flax_available,
|
||||
is_jax_tensor,
|
||||
is_tf_available,
|
||||
@@ -471,27 +471,6 @@ class RTDetrImageProcessor(BaseImageProcessor):
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
self.do_pad = do_pad
|
||||
self.pad_size = pad_size
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"annotations",
|
||||
"return_segmentation_masks",
|
||||
"masks_path",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"do_convert_annotations",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_pad",
|
||||
"pad_size",
|
||||
"format",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def prepare_annotation(
|
||||
self,
|
||||
@@ -800,6 +779,7 @@ class RTDetrImageProcessor(BaseImageProcessor):
|
||||
|
||||
return encoded_inputs
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -821,7 +801,6 @@ class RTDetrImageProcessor(BaseImageProcessor):
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
pad_size: Optional[Dict[str, int]] = None,
|
||||
**kwargs,
|
||||
) -> BatchFeature:
|
||||
"""
|
||||
Preprocess an image or a batch of images so that it can be used by the model.
|
||||
@@ -920,7 +899,6 @@ class RTDetrImageProcessor(BaseImageProcessor):
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
"torch.Tensor, tf.Tensor or jax.ndarray."
|
||||
)
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
# Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
|
||||
|
||||
|
||||
@@ -35,11 +35,11 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import (
|
||||
TensorType,
|
||||
filter_out_non_signature_kwargs,
|
||||
is_tf_available,
|
||||
is_torch_available,
|
||||
is_torchvision_available,
|
||||
@@ -162,26 +162,6 @@ class SamImageProcessor(BaseImageProcessor):
|
||||
self.pad_size = pad_size
|
||||
self.mask_pad_size = mask_pad_size
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"segmentation_maps",
|
||||
"do_resize",
|
||||
"size",
|
||||
"mask_size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_pad",
|
||||
"pad_size",
|
||||
"mask_pad_size",
|
||||
"do_convert_rgb",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def pad_image(
|
||||
self,
|
||||
@@ -409,6 +389,7 @@ class SamImageProcessor(BaseImageProcessor):
|
||||
|
||||
return segmentation_map, original_size
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -429,7 +410,6 @@ class SamImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -513,8 +493,6 @@ class SamImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -33,10 +33,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -107,22 +106,8 @@ class SiglipImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean
|
||||
self.image_std = image_std
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
"do_convert_rgb",
|
||||
]
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -138,7 +123,6 @@ class SiglipImageProcessor(BaseImageProcessor):
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
do_convert_rgb: bool = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -199,8 +183,6 @@ class SiglipImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -28,10 +28,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -66,16 +65,6 @@ class Swin2SRImageProcessor(BaseImageProcessor):
|
||||
self.rescale_factor = rescale_factor
|
||||
self.do_pad = do_pad
|
||||
self.pad_size = pad_size
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_pad",
|
||||
"pad_size",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def pad(
|
||||
self,
|
||||
@@ -118,6 +107,7 @@ class Swin2SRImageProcessor(BaseImageProcessor):
|
||||
input_data_format=input_data_format,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -128,7 +118,6 @@ class Swin2SRImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -172,8 +161,6 @@ class Swin2SRImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -36,10 +36,9 @@ from ...image_utils import (
|
||||
is_valid_image,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -173,27 +172,6 @@ class TvpImageProcessor(BaseImageProcessor):
|
||||
self.do_flip_channel_order = do_flip_channel_order
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"videos",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_pad",
|
||||
"pad_size",
|
||||
"constant_values",
|
||||
"pad_mode",
|
||||
"do_normalize",
|
||||
"do_flip_channel_order",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -358,6 +336,7 @@ class TvpImageProcessor(BaseImageProcessor):
|
||||
|
||||
return image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
videos: Union[ImageInput, List[ImageInput], List[List[ImageInput]]],
|
||||
@@ -379,7 +358,6 @@ class TvpImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -459,8 +437,6 @@ class TvpImageProcessor(BaseImageProcessor):
|
||||
crop_size = crop_size if crop_size is not None else self.crop_size
|
||||
crop_size = get_size_dict(crop_size, param_name="crop_size")
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(videos):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -38,10 +38,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -140,24 +139,6 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
|
||||
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"videos",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_convert_rgb",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -208,6 +189,7 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: List[ImageInput] = None,
|
||||
@@ -226,7 +208,6 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -301,8 +282,6 @@ class VideoLlavaImageProcessor(BaseImageProcessor):
|
||||
if videos is not None:
|
||||
videos = make_batched_videos(videos)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if (videos is not None and not valid_images(videos)) or (images is not None and not valid_images(images)):
|
||||
raise ValueError(
|
||||
"Invalid input type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -35,10 +35,9 @@ from ...image_utils import (
|
||||
is_valid_image,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -131,22 +130,6 @@ class VideoMAEImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"videos",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -249,6 +232,7 @@ class VideoMAEImageProcessor(BaseImageProcessor):
|
||||
image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format)
|
||||
return image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
videos: ImageInput,
|
||||
@@ -265,7 +249,6 @@ class VideoMAEImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -328,8 +311,6 @@ class VideoMAEImageProcessor(BaseImageProcessor):
|
||||
crop_size = crop_size if crop_size is not None else self.crop_size
|
||||
crop_size = get_size_dict(crop_size, param_name="crop_size")
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(videos):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -32,10 +32,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_vision_available, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -192,22 +191,6 @@ class ViltImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self.do_pad = do_pad
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"size_divisor",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_pad",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
|
||||
@@ -351,6 +334,7 @@ class ViltImageProcessor(BaseImageProcessor):
|
||||
|
||||
return BatchFeature(data=data, tensor_type=return_tensors)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -367,7 +351,6 @@ class ViltImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -433,8 +416,6 @@ class ViltImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -31,10 +31,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -96,20 +95,6 @@ class ViTImageProcessor(BaseImageProcessor):
|
||||
self.rescale_factor = rescale_factor
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -159,6 +144,7 @@ class ViTImageProcessor(BaseImageProcessor):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -173,7 +159,6 @@ class ViTImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -232,8 +217,6 @@ class ViTImageProcessor(BaseImageProcessor):
|
||||
|
||||
images = make_list_of_images(images)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(images):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -31,10 +31,9 @@ from ...image_utils import (
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, logging
|
||||
from ...utils import TensorType, filter_out_non_signature_kwargs, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@@ -88,20 +87,6 @@ class VitMatteImageProcessor(BaseImageProcessor):
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self.size_divisibility = size_divisibility
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"trimaps",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_pad",
|
||||
"size_divisibility",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def pad_image(
|
||||
self,
|
||||
@@ -144,6 +129,7 @@ class VitMatteImageProcessor(BaseImageProcessor):
|
||||
|
||||
return image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
@@ -158,7 +144,6 @@ class VitMatteImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -213,8 +198,6 @@ class VitMatteImageProcessor(BaseImageProcessor):
|
||||
images = make_list_of_images(images)
|
||||
trimaps = make_list_of_images(trimaps, expected_ndims=2)
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(trimaps):
|
||||
raise ValueError(
|
||||
"Invalid trimap type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -39,10 +39,9 @@ from ...image_utils import (
|
||||
is_valid_image,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_kwargs,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import logging
|
||||
from ...utils import filter_out_non_signature_kwargs, logging
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -139,23 +138,6 @@ class VivitImageProcessor(BaseImageProcessor):
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self._valid_processor_keys = [
|
||||
"videos",
|
||||
"do_resize",
|
||||
"size",
|
||||
"resample",
|
||||
"do_center_crop",
|
||||
"crop_size",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"offset",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
@@ -304,6 +286,7 @@ class VivitImageProcessor(BaseImageProcessor):
|
||||
image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format)
|
||||
return image
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
videos: ImageInput,
|
||||
@@ -321,7 +304,6 @@ class VivitImageProcessor(BaseImageProcessor):
|
||||
return_tensors: Optional[Union[str, TensorType]] = None,
|
||||
data_format: ChannelDimension = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs,
|
||||
) -> PIL.Image.Image:
|
||||
"""
|
||||
Preprocess an image or batch of images.
|
||||
@@ -387,8 +369,6 @@ class VivitImageProcessor(BaseImageProcessor):
|
||||
crop_size = crop_size if crop_size is not None else self.crop_size
|
||||
crop_size = get_size_dict(crop_size, param_name="crop_size")
|
||||
|
||||
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
|
||||
|
||||
if not valid_images(videos):
|
||||
raise ValueError(
|
||||
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
|
||||
|
||||
@@ -35,7 +35,14 @@ from ...image_utils import (
|
||||
valid_images,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType, is_torch_available, is_vision_available, logging, requires_backends
|
||||
from ...utils import (
|
||||
TensorType,
|
||||
filter_out_non_signature_kwargs,
|
||||
is_torch_available,
|
||||
is_vision_available,
|
||||
logging,
|
||||
requires_backends,
|
||||
)
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -164,24 +171,6 @@ class ZoeDepthImageProcessor(BaseImageProcessor):
|
||||
self.ensure_multiple_of = ensure_multiple_of
|
||||
self.resample = resample
|
||||
|
||||
self._valid_processor_keys = [
|
||||
"images",
|
||||
"do_resize",
|
||||
"size",
|
||||
"keep_aspect_ratio",
|
||||
"ensure_multiple_of",
|
||||
"resample",
|
||||
"do_rescale",
|
||||
"rescale_factor",
|
||||
"do_normalize",
|
||||
"image_mean",
|
||||
"image_std",
|
||||
"do_pad",
|
||||
"return_tensors",
|
||||
"data_format",
|
||||
"input_data_format",
|
||||
]
|
||||
|
||||
def resize(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
@@ -301,6 +290,7 @@ class ZoeDepthImageProcessor(BaseImageProcessor):
|
||||
input_data_format=input_data_format,
|
||||
)
|
||||
|
||||
@filter_out_non_signature_kwargs()
|
||||
def preprocess(
|
||||
self,
|
||||
images: ImageInput,
|
||||
|
||||
@@ -816,6 +816,9 @@ def filter_out_non_signature_kwargs(extra: Optional[list] = None):
|
||||
is_instance_method = "self" in function_named_args
|
||||
is_class_method = "cls" in function_named_args
|
||||
|
||||
# Mark function as decorated
|
||||
func._filter_out_non_signature_kwargs = True
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
valid_kwargs = {}
|
||||
|
||||
@@ -43,6 +43,7 @@ class BlipImageProcessingTester(unittest.TestCase):
|
||||
image_std=[0.26862954, 0.26130258, 0.27577711],
|
||||
do_convert_rgb=True,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 20, "width": 20}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -50,6 +50,7 @@ class BridgeTowerImageProcessingTester(unittest.TestCase):
|
||||
max_resolution=400,
|
||||
num_channels=3,
|
||||
):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
self.do_resize = do_resize
|
||||
self.size = size if size is not None else {"shortest_edge": 288}
|
||||
|
||||
@@ -50,6 +50,7 @@ class ChameleonImageProcessingTester(unittest.TestCase):
|
||||
image_std=[1.0, 1.0, 1.0],
|
||||
do_convert_rgb=True,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 18}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
|
||||
@@ -44,6 +44,7 @@ class ChineseCLIPImageProcessingTester(unittest.TestCase):
|
||||
image_std=[0.26862954, 0.26130258, 0.27577711],
|
||||
do_convert_rgb=True,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 224, "width": 224}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
|
||||
@@ -42,6 +42,7 @@ class ConvNextImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 20}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -43,6 +43,7 @@ class DeiTImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 20, "width": 20}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ class DonutImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.num_channels = num_channels
|
||||
|
||||
@@ -43,6 +43,7 @@ class DPTImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -43,6 +43,7 @@ class EfficientNetImageProcessorTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -76,6 +76,7 @@ class FlavaImageProcessingTester(unittest.TestCase):
|
||||
codebook_image_mean=FLAVA_CODEBOOK_MEAN,
|
||||
codebook_image_std=FLAVA_CODEBOOK_STD,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 224, "width": 224}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 224, "width": 224}
|
||||
codebook_size = codebook_size if codebook_size is not None else {"height": 112, "width": 112}
|
||||
|
||||
@@ -46,6 +46,7 @@ class GLPNImageProcessingTester(unittest.TestCase):
|
||||
size_divisor=32,
|
||||
do_rescale=True,
|
||||
):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.num_channels = num_channels
|
||||
|
||||
@@ -51,6 +51,7 @@ class ImageGPTImageProcessingTester(unittest.TestCase):
|
||||
size=None,
|
||||
do_normalize=True,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -50,6 +50,7 @@ class InstructBlipVideoProcessingTester(unittest.TestCase):
|
||||
do_convert_rgb=True,
|
||||
frames=4,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -41,6 +41,7 @@ class LayoutLMv2ImageProcessingTester(unittest.TestCase):
|
||||
size=None,
|
||||
apply_ocr=True,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -41,6 +41,7 @@ class LayoutLMv3ImageProcessingTester(unittest.TestCase):
|
||||
size=None,
|
||||
apply_ocr=True,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -43,6 +43,7 @@ class LevitImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 18}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
|
||||
@@ -40,6 +40,7 @@ class MobileNetV1ImageProcessingTester(unittest.TestCase):
|
||||
do_center_crop=True,
|
||||
crop_size=None,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 20}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
|
||||
@@ -40,6 +40,7 @@ class MobileNetV2ImageProcessingTester(unittest.TestCase):
|
||||
do_center_crop=True,
|
||||
crop_size=None,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 20}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
|
||||
@@ -48,6 +48,7 @@ class MobileViTImageProcessingTester(unittest.TestCase):
|
||||
crop_size=None,
|
||||
do_flip_channel_order=True,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 20}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
|
||||
@@ -53,6 +53,7 @@ class NougatImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 20, "width": 20}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -47,6 +47,7 @@ class Owlv2ImageProcessingTester(unittest.TestCase):
|
||||
image_std=[0.26862954, 0.26130258, 0.27577711],
|
||||
do_convert_rgb=True,
|
||||
):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.num_channels = num_channels
|
||||
|
||||
@@ -44,6 +44,7 @@ class OwlViTImageProcessingTester(unittest.TestCase):
|
||||
image_std=[0.26862954, 0.26130258, 0.27577711],
|
||||
do_convert_rgb=True,
|
||||
):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.num_channels = num_channels
|
||||
|
||||
@@ -41,6 +41,7 @@ class PoolFormerImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 30}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 30, "width": 30}
|
||||
self.parent = parent
|
||||
|
||||
@@ -41,6 +41,7 @@ class PvtImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.485, 0.456, 0.406],
|
||||
image_std=[0.229, 0.224, 0.225],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -45,6 +45,7 @@ class RTDetrImageProcessingTester(unittest.TestCase):
|
||||
do_pad=False,
|
||||
return_tensors="pt",
|
||||
):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.num_channels = num_channels
|
||||
|
||||
@@ -43,6 +43,7 @@ class SiglipImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -48,6 +48,7 @@ class Swin2SRImageProcessingTester(unittest.TestCase):
|
||||
do_pad=True,
|
||||
pad_size=8,
|
||||
):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.num_channels = num_channels
|
||||
|
||||
@@ -58,6 +58,7 @@ class TvpImageProcessingTester(unittest.TestCase):
|
||||
num_channels=3,
|
||||
num_frames=2,
|
||||
):
|
||||
super().__init__()
|
||||
self.do_resize = do_resize
|
||||
self.size = size
|
||||
self.do_center_crop = do_center_crop
|
||||
|
||||
@@ -52,6 +52,7 @@ class VideoLlavaImageProcessingTester(unittest.TestCase):
|
||||
image_std=OPENAI_CLIP_STD,
|
||||
do_convert_rgb=True,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 20}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
|
||||
@@ -50,6 +50,7 @@ class VideoMAEImageProcessingTester(unittest.TestCase):
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
crop_size=None,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 18}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ class ViltImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 30}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -44,6 +44,7 @@ class ViTImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -51,6 +52,7 @@ class VitMatteImageProcessingTester(unittest.TestCase):
|
||||
image_mean=[0.5, 0.5, 0.5],
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
):
|
||||
super().__init__()
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
self.num_channels = num_channels
|
||||
@@ -197,3 +199,20 @@ class VitMatteImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
image = np.random.randn(3, 249, 512)
|
||||
images = image_processing.pad_image(image)
|
||||
assert images.shape == (3, 256, 512)
|
||||
|
||||
def test_image_processor_preprocess_arguments(self):
|
||||
# vitmatte require additional trimap input for image_processor
|
||||
# that is why we override original common test
|
||||
|
||||
for image_processing_class in self.image_processor_list:
|
||||
image_processor = image_processing_class(**self.image_processor_dict)
|
||||
image = self.image_processor_tester.prepare_image_inputs()[0]
|
||||
trimap = np.random.randint(0, 3, size=image.size[::-1])
|
||||
|
||||
with warnings.catch_warnings(record=True) as raised_warnings:
|
||||
warnings.simplefilter("always")
|
||||
image_processor(image, trimaps=trimap, extra_argument=True)
|
||||
|
||||
messages = " ".join([str(w.message) for w in raised_warnings])
|
||||
self.assertGreaterEqual(len(raised_warnings), 1)
|
||||
self.assertIn("extra_argument", messages)
|
||||
|
||||
@@ -50,6 +50,7 @@ class VivitImageProcessingTester(unittest.TestCase):
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
crop_size=None,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"shortest_edge": 18}
|
||||
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ class ZoeDepthImageProcessingTester(unittest.TestCase):
|
||||
image_std=[0.5, 0.5, 0.5],
|
||||
do_pad=False,
|
||||
):
|
||||
super().__init__()
|
||||
size = size if size is not None else {"height": 18, "width": 18}
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -19,6 +19,7 @@ import os
|
||||
import pathlib
|
||||
import tempfile
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
@@ -425,8 +426,12 @@ class ImageProcessingTestMixin:
|
||||
)
|
||||
|
||||
def test_image_processor_preprocess_arguments(self):
|
||||
is_tested = False
|
||||
|
||||
for image_processing_class in self.image_processor_list:
|
||||
image_processor = image_processing_class(**self.image_processor_dict)
|
||||
|
||||
# validation done by _valid_processor_keys attribute
|
||||
if hasattr(image_processor, "_valid_processor_keys") and hasattr(image_processor, "preprocess"):
|
||||
preprocess_parameter_names = inspect.getfullargspec(image_processor.preprocess).args
|
||||
preprocess_parameter_names.remove("self")
|
||||
@@ -434,6 +439,28 @@ class ImageProcessingTestMixin:
|
||||
valid_processor_keys = image_processor._valid_processor_keys
|
||||
valid_processor_keys.sort()
|
||||
self.assertEqual(preprocess_parameter_names, valid_processor_keys)
|
||||
is_tested = True
|
||||
|
||||
# validation done by @filter_out_non_signature_kwargs decorator
|
||||
if hasattr(image_processor.preprocess, "_filter_out_non_signature_kwargs"):
|
||||
if hasattr(self.image_processor_tester, "prepare_image_inputs"):
|
||||
inputs = self.image_processor_tester.prepare_image_inputs()
|
||||
elif hasattr(self.image_processor_tester, "prepare_video_inputs"):
|
||||
inputs = self.image_processor_tester.prepare_video_inputs()
|
||||
else:
|
||||
self.skipTest(reason="No valid input preparation method found")
|
||||
|
||||
with warnings.catch_warnings(record=True) as raised_warnings:
|
||||
warnings.simplefilter("always")
|
||||
image_processor(inputs, extra_argument=True)
|
||||
|
||||
messages = " ".join([str(w.message) for w in raised_warnings])
|
||||
self.assertGreaterEqual(len(raised_warnings), 1)
|
||||
self.assertIn("extra_argument", messages)
|
||||
is_tested = True
|
||||
|
||||
if not is_tested:
|
||||
self.skipTest(reason="No validation found for `preprocess` method")
|
||||
|
||||
|
||||
class AnnotationFormatTestMixin:
|
||||
|
||||
Reference in New Issue
Block a user