[processor] clean up mulitmodal tests (#37362)

* clkea up mulitmodal processor tests

* fixup

* fix tests

* fix one last test

* forgot
This commit is contained in:
Raushan Turganbay
2025-04-11 13:32:19 +02:00
committed by GitHub
parent 3c39c07939
commit a563999a02
30 changed files with 304 additions and 817 deletions

View File

@@ -31,12 +31,16 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_flat_list_of_images,
to_numpy_array,
valid_images,
validate_preprocess_arguments,
)
from ...utils import TensorType
from ...utils import TensorType, logging
logger = logging.get_logger(__name__)
def divide_to_patches(image: np.array, patch_size: int, input_data_format) -> List[np.array]:
@@ -104,6 +108,12 @@ class AriaImageProcessor(BaseImageProcessor):
Whether to split the image.
do_convert_rgb (`bool`, *optional*, defaults to `True`):
Whether to convert the image to RGB.
do_rescale (`bool`, *optional*, defaults to `True`):
Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
the `preprocess` method.
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
method.
do_normalize (`bool`, *optional*, defaults to `True`):
Whether to normalize the image.
resample (PILImageResampling, *optional*, defaults to `BICUBIC`):
@@ -121,6 +131,8 @@ class AriaImageProcessor(BaseImageProcessor):
split_resolutions: Optional[List[Tuple[int, int]]] = None,
split_image: Optional[bool] = False,
do_convert_rgb: Optional[bool] = True,
do_rescale: bool = True,
rescale_factor: Union[int, float] = 1 / 255,
do_normalize: Optional[bool] = True,
resample: PILImageResampling = PILImageResampling.BICUBIC,
**kwargs,
@@ -141,6 +153,8 @@ class AriaImageProcessor(BaseImageProcessor):
split_resolutions = [(el[0] * 490, el[1] * 490) for el in split_resolutions]
self.split_resolutions = split_resolutions
self.do_convert_rgb = do_convert_rgb
self.do_rescale = do_rescale
self.rescale_factor = rescale_factor
self.do_normalize = do_normalize
self.resample = resample
@@ -153,6 +167,8 @@ class AriaImageProcessor(BaseImageProcessor):
min_image_size: Optional[int] = None,
split_image: Optional[bool] = None,
do_convert_rgb: Optional[bool] = None,
do_rescale: Optional[bool] = None,
rescale_factor: Optional[float] = None,
do_normalize: Optional[bool] = None,
resample: PILImageResampling = None,
return_tensors: Optional[Union[str, TensorType]] = "pt",
@@ -177,6 +193,10 @@ class AriaImageProcessor(BaseImageProcessor):
Whether to split the image.
do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb` (True)):
Whether to convert the image to RGB.
do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
Whether to rescale the image.
rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
Rescale factor to rescale the image by if `do_rescale` is set to `True`.
do_normalize (`bool`, *optional*, defaults to `self.do_normalize` (True)):
Whether to normalize the image.
resample (PILImageResampling, *optional*, defaults to `self.resample` (BICUBIC)):
@@ -217,6 +237,8 @@ class AriaImageProcessor(BaseImageProcessor):
min_image_size = min_image_size if min_image_size is not None else self.min_image_size
split_image = split_image if split_image is not None else self.split_image
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
do_rescale = do_rescale if do_rescale is not None else self.do_rescale
rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor
do_normalize = do_normalize if do_normalize is not None else self.do_normalize
resample = resample if resample is not None else self.resample
@@ -236,6 +258,8 @@ class AriaImageProcessor(BaseImageProcessor):
image_mean=image_mean,
image_std=image_std,
resample=resample,
do_rescale=do_rescale,
rescale_factor=rescale_factor,
)
if do_convert_rgb:
@@ -244,6 +268,12 @@ class AriaImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if do_rescale and is_scaled_image(images[0]):
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
@@ -297,9 +327,14 @@ class AriaImageProcessor(BaseImageProcessor):
pixel_mask[: new_size[0], : new_size[1]] = 1
pixel_masks.append(pixel_mask)
if do_rescale:
crop_image_padded = self.rescale(
image=crop_image_padded, scale=rescale_factor, input_data_format=input_data_format
)
if do_normalize:
crop_image_padded = self.normalize(
crop_image_padded / 255.0,
crop_image_padded,
self.image_mean,
self.image_std,
data_format=input_data_format,

View File

@@ -28,6 +28,7 @@ from ...image_utils import (
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_flat_list_of_images,
to_numpy_array,
valid_images,
@@ -495,6 +496,12 @@ class AriaImageProcessor(BaseImageProcessor):
Whether to split the image.
do_convert_rgb (`bool`, *optional*, defaults to `True`):
Whether to convert the image to RGB.
do_rescale (`bool`, *optional*, defaults to `True`):
Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
the `preprocess` method.
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
method.
do_normalize (`bool`, *optional*, defaults to `True`):
Whether to normalize the image.
resample (PILImageResampling, *optional*, defaults to `BICUBIC`):
@@ -512,6 +519,8 @@ class AriaImageProcessor(BaseImageProcessor):
split_resolutions: Optional[List[Tuple[int, int]]] = None,
split_image: Optional[bool] = False,
do_convert_rgb: Optional[bool] = True,
do_rescale: bool = True,
rescale_factor: Union[int, float] = 1 / 255,
do_normalize: Optional[bool] = True,
resample: PILImageResampling = PILImageResampling.BICUBIC,
**kwargs,
@@ -532,6 +541,8 @@ class AriaImageProcessor(BaseImageProcessor):
split_resolutions = [(el[0] * 490, el[1] * 490) for el in split_resolutions]
self.split_resolutions = split_resolutions
self.do_convert_rgb = do_convert_rgb
self.do_rescale = do_rescale
self.rescale_factor = rescale_factor
self.do_normalize = do_normalize
self.resample = resample
@@ -544,6 +555,8 @@ class AriaImageProcessor(BaseImageProcessor):
min_image_size: Optional[int] = None,
split_image: Optional[bool] = None,
do_convert_rgb: Optional[bool] = None,
do_rescale: Optional[bool] = None,
rescale_factor: Optional[float] = None,
do_normalize: Optional[bool] = None,
resample: PILImageResampling = None,
return_tensors: Optional[Union[str, TensorType]] = "pt",
@@ -568,6 +581,10 @@ class AriaImageProcessor(BaseImageProcessor):
Whether to split the image.
do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb` (True)):
Whether to convert the image to RGB.
do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
Whether to rescale the image.
rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
Rescale factor to rescale the image by if `do_rescale` is set to `True`.
do_normalize (`bool`, *optional*, defaults to `self.do_normalize` (True)):
Whether to normalize the image.
resample (PILImageResampling, *optional*, defaults to `self.resample` (BICUBIC)):
@@ -608,6 +625,8 @@ class AriaImageProcessor(BaseImageProcessor):
min_image_size = min_image_size if min_image_size is not None else self.min_image_size
split_image = split_image if split_image is not None else self.split_image
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
do_rescale = do_rescale if do_rescale is not None else self.do_rescale
rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor
do_normalize = do_normalize if do_normalize is not None else self.do_normalize
resample = resample if resample is not None else self.resample
@@ -627,6 +646,8 @@ class AriaImageProcessor(BaseImageProcessor):
image_mean=image_mean,
image_std=image_std,
resample=resample,
do_rescale=do_rescale,
rescale_factor=rescale_factor,
)
if do_convert_rgb:
@@ -635,6 +656,12 @@ class AriaImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if do_rescale and is_scaled_image(images[0]):
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
@@ -688,9 +715,14 @@ class AriaImageProcessor(BaseImageProcessor):
pixel_mask[: new_size[0], : new_size[1]] = 1
pixel_masks.append(pixel_mask)
if do_rescale:
crop_image_padded = self.rescale(
image=crop_image_padded, scale=rescale_factor, input_data_format=input_data_format
)
if do_normalize:
crop_image_padded = self.normalize(
crop_image_padded / 255.0,
crop_image_padded,
self.image_mean,
self.image_std,
data_format=input_data_format,