[processor] clean up mulitmodal tests (#37362)
* clkea up mulitmodal processor tests * fixup * fix tests * fix one last test * forgot
This commit is contained in:
committed by
GitHub
parent
3c39c07939
commit
a563999a02
@@ -31,12 +31,16 @@ from ...image_utils import (
|
||||
PILImageResampling,
|
||||
get_image_size,
|
||||
infer_channel_dimension_format,
|
||||
is_scaled_image,
|
||||
make_flat_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
validate_preprocess_arguments,
|
||||
)
|
||||
from ...utils import TensorType
|
||||
from ...utils import TensorType, logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
def divide_to_patches(image: np.array, patch_size: int, input_data_format) -> List[np.array]:
|
||||
@@ -104,6 +108,12 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
Whether to split the image.
|
||||
do_convert_rgb (`bool`, *optional*, defaults to `True`):
|
||||
Whether to convert the image to RGB.
|
||||
do_rescale (`bool`, *optional*, defaults to `True`):
|
||||
Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
|
||||
the `preprocess` method.
|
||||
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
|
||||
Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
|
||||
method.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether to normalize the image.
|
||||
resample (PILImageResampling, *optional*, defaults to `BICUBIC`):
|
||||
@@ -121,6 +131,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
split_resolutions: Optional[List[Tuple[int, int]]] = None,
|
||||
split_image: Optional[bool] = False,
|
||||
do_convert_rgb: Optional[bool] = True,
|
||||
do_rescale: bool = True,
|
||||
rescale_factor: Union[int, float] = 1 / 255,
|
||||
do_normalize: Optional[bool] = True,
|
||||
resample: PILImageResampling = PILImageResampling.BICUBIC,
|
||||
**kwargs,
|
||||
@@ -141,6 +153,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
split_resolutions = [(el[0] * 490, el[1] * 490) for el in split_resolutions]
|
||||
self.split_resolutions = split_resolutions
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self.do_rescale = do_rescale
|
||||
self.rescale_factor = rescale_factor
|
||||
self.do_normalize = do_normalize
|
||||
self.resample = resample
|
||||
|
||||
@@ -153,6 +167,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
min_image_size: Optional[int] = None,
|
||||
split_image: Optional[bool] = None,
|
||||
do_convert_rgb: Optional[bool] = None,
|
||||
do_rescale: Optional[bool] = None,
|
||||
rescale_factor: Optional[float] = None,
|
||||
do_normalize: Optional[bool] = None,
|
||||
resample: PILImageResampling = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = "pt",
|
||||
@@ -177,6 +193,10 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
Whether to split the image.
|
||||
do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb` (True)):
|
||||
Whether to convert the image to RGB.
|
||||
do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
|
||||
Whether to rescale the image.
|
||||
rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
|
||||
Rescale factor to rescale the image by if `do_rescale` is set to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `self.do_normalize` (True)):
|
||||
Whether to normalize the image.
|
||||
resample (PILImageResampling, *optional*, defaults to `self.resample` (BICUBIC)):
|
||||
@@ -217,6 +237,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
min_image_size = min_image_size if min_image_size is not None else self.min_image_size
|
||||
split_image = split_image if split_image is not None else self.split_image
|
||||
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
|
||||
do_rescale = do_rescale if do_rescale is not None else self.do_rescale
|
||||
rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor
|
||||
do_normalize = do_normalize if do_normalize is not None else self.do_normalize
|
||||
resample = resample if resample is not None else self.resample
|
||||
|
||||
@@ -236,6 +258,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
image_mean=image_mean,
|
||||
image_std=image_std,
|
||||
resample=resample,
|
||||
do_rescale=do_rescale,
|
||||
rescale_factor=rescale_factor,
|
||||
)
|
||||
|
||||
if do_convert_rgb:
|
||||
@@ -244,6 +268,12 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
# All transformations expect numpy arrays.
|
||||
images = [to_numpy_array(image) for image in images]
|
||||
|
||||
if do_rescale and is_scaled_image(images[0]):
|
||||
logger.warning_once(
|
||||
"It looks like you are trying to rescale already rescaled images. If the input"
|
||||
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
|
||||
)
|
||||
|
||||
if input_data_format is None:
|
||||
# We assume that all images have the same channel dimension format.
|
||||
input_data_format = infer_channel_dimension_format(images[0])
|
||||
@@ -297,9 +327,14 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
pixel_mask[: new_size[0], : new_size[1]] = 1
|
||||
pixel_masks.append(pixel_mask)
|
||||
|
||||
if do_rescale:
|
||||
crop_image_padded = self.rescale(
|
||||
image=crop_image_padded, scale=rescale_factor, input_data_format=input_data_format
|
||||
)
|
||||
|
||||
if do_normalize:
|
||||
crop_image_padded = self.normalize(
|
||||
crop_image_padded / 255.0,
|
||||
crop_image_padded,
|
||||
self.image_mean,
|
||||
self.image_std,
|
||||
data_format=input_data_format,
|
||||
|
||||
@@ -28,6 +28,7 @@ from ...image_utils import (
|
||||
PILImageResampling,
|
||||
get_image_size,
|
||||
infer_channel_dimension_format,
|
||||
is_scaled_image,
|
||||
make_flat_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_images,
|
||||
@@ -495,6 +496,12 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
Whether to split the image.
|
||||
do_convert_rgb (`bool`, *optional*, defaults to `True`):
|
||||
Whether to convert the image to RGB.
|
||||
do_rescale (`bool`, *optional*, defaults to `True`):
|
||||
Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
|
||||
the `preprocess` method.
|
||||
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
|
||||
Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
|
||||
method.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether to normalize the image.
|
||||
resample (PILImageResampling, *optional*, defaults to `BICUBIC`):
|
||||
@@ -512,6 +519,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
split_resolutions: Optional[List[Tuple[int, int]]] = None,
|
||||
split_image: Optional[bool] = False,
|
||||
do_convert_rgb: Optional[bool] = True,
|
||||
do_rescale: bool = True,
|
||||
rescale_factor: Union[int, float] = 1 / 255,
|
||||
do_normalize: Optional[bool] = True,
|
||||
resample: PILImageResampling = PILImageResampling.BICUBIC,
|
||||
**kwargs,
|
||||
@@ -532,6 +541,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
split_resolutions = [(el[0] * 490, el[1] * 490) for el in split_resolutions]
|
||||
self.split_resolutions = split_resolutions
|
||||
self.do_convert_rgb = do_convert_rgb
|
||||
self.do_rescale = do_rescale
|
||||
self.rescale_factor = rescale_factor
|
||||
self.do_normalize = do_normalize
|
||||
self.resample = resample
|
||||
|
||||
@@ -544,6 +555,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
min_image_size: Optional[int] = None,
|
||||
split_image: Optional[bool] = None,
|
||||
do_convert_rgb: Optional[bool] = None,
|
||||
do_rescale: Optional[bool] = None,
|
||||
rescale_factor: Optional[float] = None,
|
||||
do_normalize: Optional[bool] = None,
|
||||
resample: PILImageResampling = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = "pt",
|
||||
@@ -568,6 +581,10 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
Whether to split the image.
|
||||
do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb` (True)):
|
||||
Whether to convert the image to RGB.
|
||||
do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
|
||||
Whether to rescale the image.
|
||||
rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
|
||||
Rescale factor to rescale the image by if `do_rescale` is set to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `self.do_normalize` (True)):
|
||||
Whether to normalize the image.
|
||||
resample (PILImageResampling, *optional*, defaults to `self.resample` (BICUBIC)):
|
||||
@@ -608,6 +625,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
min_image_size = min_image_size if min_image_size is not None else self.min_image_size
|
||||
split_image = split_image if split_image is not None else self.split_image
|
||||
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
|
||||
do_rescale = do_rescale if do_rescale is not None else self.do_rescale
|
||||
rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor
|
||||
do_normalize = do_normalize if do_normalize is not None else self.do_normalize
|
||||
resample = resample if resample is not None else self.resample
|
||||
|
||||
@@ -627,6 +646,8 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
image_mean=image_mean,
|
||||
image_std=image_std,
|
||||
resample=resample,
|
||||
do_rescale=do_rescale,
|
||||
rescale_factor=rescale_factor,
|
||||
)
|
||||
|
||||
if do_convert_rgb:
|
||||
@@ -635,6 +656,12 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
# All transformations expect numpy arrays.
|
||||
images = [to_numpy_array(image) for image in images]
|
||||
|
||||
if do_rescale and is_scaled_image(images[0]):
|
||||
logger.warning_once(
|
||||
"It looks like you are trying to rescale already rescaled images. If the input"
|
||||
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
|
||||
)
|
||||
|
||||
if input_data_format is None:
|
||||
# We assume that all images have the same channel dimension format.
|
||||
input_data_format = infer_channel_dimension_format(images[0])
|
||||
@@ -688,9 +715,14 @@ class AriaImageProcessor(BaseImageProcessor):
|
||||
pixel_mask[: new_size[0], : new_size[1]] = 1
|
||||
pixel_masks.append(pixel_mask)
|
||||
|
||||
if do_rescale:
|
||||
crop_image_padded = self.rescale(
|
||||
image=crop_image_padded, scale=rescale_factor, input_data_format=input_data_format
|
||||
)
|
||||
|
||||
if do_normalize:
|
||||
crop_image_padded = self.normalize(
|
||||
crop_image_padded / 255.0,
|
||||
crop_image_padded,
|
||||
self.image_mean,
|
||||
self.image_std,
|
||||
data_format=input_data_format,
|
||||
|
||||
Reference in New Issue
Block a user