From 7e35f37071655744b94c583edbb2f7fc209e1b7d Mon Sep 17 00:00:00 2001 From: rjenc29 Date: Mon, 11 Dec 2023 15:51:51 +0000 Subject: [PATCH] Fix a couple of typos and add an illustrative test (#26941) * fix a typo and add an illustrative test * appease black * reduce code duplication and add Annotion type back with a pending deprecation warning * remove unused code * change warning type * black formatting fix * change enum deprecation approach to support 3.8 and earlier * add stacklevel * fix black issue * fix ruff issues * fix ruff issues * move tests to own mixin * include yolos * fix black formatting issue * fix black formatting issue * use logger instead of warnings and include target version for deprecation --- src/transformers/image_utils.py | 57 +++++++++++++ .../image_processing_conditional_detr.py | 49 ++++------- .../image_processing_deformable_detr.py | 50 ++++-------- .../models/deta/image_processing_deta.py | 52 ++++-------- .../models/detr/image_processing_detr.py | 50 ++++-------- .../models/yolos/image_processing_yolos.py | 54 ++++--------- .../test_image_processing_conditional_detr.py | 4 +- .../test_image_processing_deformable_detr.py | 4 +- .../models/deta/test_image_processing_deta.py | 4 +- .../models/detr/test_image_processing_detr.py | 61 +++++++++++++- .../yolos/test_image_processing_yolos.py | 4 +- tests/test_image_processing_common.py | 81 +++++++++++++++++++ 12 files changed, 279 insertions(+), 191 deletions(-) diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index bfb88d03d3..5d280bf5e2 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -15,6 +15,7 @@ import base64 import os +from enum import EnumMeta from io import BytesIO from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union @@ -29,6 +30,7 @@ from .utils import ( is_torch_available, is_torch_tensor, is_vision_available, + logging, requires_backends, to_numpy, ) @@ -56,6 +58,9 @@ if TYPE_CHECKING: import torch +logger = logging.get_logger(__name__) + + ImageInput = Union[ "PIL.Image.Image", np.ndarray, "torch.Tensor", List["PIL.Image.Image"], List[np.ndarray], List["torch.Tensor"] ] # noqa @@ -66,6 +71,28 @@ class ChannelDimension(ExplicitEnum): LAST = "channels_last" +class AnnotationFormat(ExplicitEnum): + COCO_DETECTION = "coco_detection" + COCO_PANOPTIC = "coco_panoptic" + + +class DeprecatedEnumMeta(EnumMeta): + def __init__(cls, *args, **kwargs): + super().__init__(*args, **kwargs) + logger.warning_once( + f"`{cls.__name__}` is deprecated and will be removed in v4.38. " + f"Please use `transformers.image_utils.AnnotationFormat` instead." + ) + + +class AnnotionFormat(ExplicitEnum, metaclass=DeprecatedEnumMeta): + COCO_DETECTION = AnnotationFormat.COCO_DETECTION.value + COCO_PANOPTIC = AnnotationFormat.COCO_PANOPTIC.value + + +AnnotationType = Dict[str, Union[int, str, List[Dict]]] + + def is_pil_image(img): return is_vision_available() and isinstance(img, PIL.Image.Image) @@ -664,3 +691,33 @@ class ImageFeatureExtractionMixin: return image.rotate( angle, resample=resample, expand=expand, center=center, translate=translate, fillcolor=fillcolor ) + + +def promote_annotation_format(annotation_format: Union[AnnotionFormat, AnnotationFormat]) -> AnnotationFormat: + # can be removed when `AnnotionFormat` is fully deprecated + return AnnotationFormat(annotation_format.value) + + +def validate_annotations( + annotation_format: AnnotationFormat, + supported_annotation_formats: Tuple[AnnotationFormat, ...], + annotations: List[Dict], +) -> None: + if promote_annotation_format(annotation_format) not in supported_annotation_formats: + raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}") + + if promote_annotation_format(annotation_format) is AnnotationFormat.COCO_DETECTION: + if not valid_coco_detection_annotations(annotations): + raise ValueError( + "Invalid COCO detection annotations. Annotations must a dict (single image) or list of dicts " + "(batch of images) with the following keys: `image_id` and `annotations`, with the latter " + "being a list of annotations in the COCO format." + ) + + if promote_annotation_format(annotation_format) is AnnotationFormat.COCO_PANOPTIC: + if not valid_coco_panoptic_annotations(annotations): + raise ValueError( + "Invalid COCO panoptic annotations. Annotations must a dict (single image) or list of dicts " + "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with " + "the latter being a list of annotations in the COCO format." + ) diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index 919e8b9da8..3ec0696430 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -37,6 +37,9 @@ from ...image_transforms import ( from ...image_utils import ( IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, + AnnotationFormat, + AnnotationType, + AnnotionFormat, # noqa: F401 ChannelDimension, ImageInput, PILImageResampling, @@ -45,12 +48,10 @@ from ...image_utils import ( is_scaled_image, make_list_of_images, to_numpy_array, - valid_coco_detection_annotations, - valid_coco_panoptic_annotations, valid_images, + validate_annotations, ) from ...utils import ( - ExplicitEnum, TensorType, is_flax_available, is_jax_tensor, @@ -80,15 +81,8 @@ if is_scipy_available(): logger = logging.get_logger(__name__) # pylint: disable=invalid-name -AnnotationType = Dict[str, Union[int, str, List[Dict]]] - -class AnnotionFormat(ExplicitEnum): - COCO_DETECTION = "coco_detection" - COCO_PANOPTIC = "coco_panoptic" - - -SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC) +SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC) # Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio @@ -802,7 +796,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__ def __init__( self, - format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, + format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, size: Dict[str, int] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, @@ -861,7 +855,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): self, image: np.ndarray, target: Dict, - format: Optional[AnnotionFormat] = None, + format: Optional[AnnotationFormat] = None, return_segmentation_masks: bool = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -871,12 +865,12 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): """ format = format if format is not None else self.format - if format == AnnotionFormat.COCO_DETECTION: + if format == AnnotationFormat.COCO_DETECTION: return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_detection_annotation( image, target, return_segmentation_masks, input_data_format=input_data_format ) - elif format == AnnotionFormat.COCO_PANOPTIC: + elif format == AnnotationFormat.COCO_PANOPTIC: return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_panoptic_annotation( image, @@ -1118,7 +1112,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, - format: Optional[Union[str, AnnotionFormat]] = None, + format: Optional[Union[str, AnnotationFormat]] = None, return_tensors: Optional[Union[TensorType, str]] = None, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -1164,7 +1158,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): Standard deviation to use when normalizing the image. do_pad (`bool`, *optional*, defaults to self.do_pad): Whether to pad the image. - format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): + format (`str` or `AnnotationFormat`, *optional*, defaults to self.format): Format of the annotations. return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): Type of tensors to return. If `None`, will return the list of images. @@ -1231,28 +1225,13 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): "torch.Tensor, tf.Tensor or jax.ndarray." ) - format = AnnotionFormat(format) + format = AnnotationFormat(format) if annotations is not None: - if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): - raise ValueError( - "Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id` and `annotations`, with the latter " - "being a list of annotations in the COCO format." - ) - elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations): - raise ValueError( - "Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with " - "the latter being a list of annotations in the COCO format." - ) - elif format not in SUPPORTED_ANNOTATION_FORMATS: - raise ValueError( - f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}" - ) + validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations) if ( masks_path is not None - and format == AnnotionFormat.COCO_PANOPTIC + and format == AnnotationFormat.COCO_PANOPTIC and not isinstance(masks_path, (pathlib.Path, str)) ): raise ValueError( diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index 69a4cb0c67..00cf8eaecf 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -37,6 +37,9 @@ from ...image_transforms import ( from ...image_utils import ( IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, + AnnotationFormat, + AnnotationType, + AnnotionFormat, # noqa: F401 ChannelDimension, ImageInput, PILImageResampling, @@ -45,12 +48,10 @@ from ...image_utils import ( is_scaled_image, make_list_of_images, to_numpy_array, - valid_coco_detection_annotations, - valid_coco_panoptic_annotations, valid_images, + validate_annotations, ) from ...utils import ( - ExplicitEnum, TensorType, is_flax_available, is_jax_tensor, @@ -79,15 +80,7 @@ if is_scipy_available(): logger = logging.get_logger(__name__) # pylint: disable=invalid-name -AnnotationType = Dict[str, Union[int, str, List[Dict]]] - - -class AnnotionFormat(ExplicitEnum): - COCO_DETECTION = "coco_detection" - COCO_PANOPTIC = "coco_panoptic" - - -SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC) +SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC) # Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio @@ -801,7 +794,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__ def __init__( self, - format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, + format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, size: Dict[str, int] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, @@ -860,7 +853,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): self, image: np.ndarray, target: Dict, - format: Optional[AnnotionFormat] = None, + format: Optional[AnnotationFormat] = None, return_segmentation_masks: bool = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -870,12 +863,12 @@ class DeformableDetrImageProcessor(BaseImageProcessor): """ format = format if format is not None else self.format - if format == AnnotionFormat.COCO_DETECTION: + if format == AnnotationFormat.COCO_DETECTION: return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_detection_annotation( image, target, return_segmentation_masks, input_data_format=input_data_format ) - elif format == AnnotionFormat.COCO_PANOPTIC: + elif format == AnnotationFormat.COCO_PANOPTIC: return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_panoptic_annotation( image, @@ -1117,7 +1110,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, - format: Optional[Union[str, AnnotionFormat]] = None, + format: Optional[Union[str, AnnotationFormat]] = None, return_tensors: Optional[Union[TensorType, str]] = None, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -1163,7 +1156,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): Standard deviation to use when normalizing the image. do_pad (`bool`, *optional*, defaults to self.do_pad): Whether to pad the image. - format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): + format (`str` or `AnnotationFormat`, *optional*, defaults to self.format): Format of the annotations. return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): Type of tensors to return. If `None`, will return the list of images. @@ -1230,28 +1223,13 @@ class DeformableDetrImageProcessor(BaseImageProcessor): "torch.Tensor, tf.Tensor or jax.ndarray." ) - format = AnnotionFormat(format) + format = AnnotationFormat(format) if annotations is not None: - if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): - raise ValueError( - "Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id` and `annotations`, with the latter " - "being a list of annotations in the COCO format." - ) - elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations): - raise ValueError( - "Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with " - "the latter being a list of annotations in the COCO format." - ) - elif format not in SUPPORTED_ANNOTATION_FORMATS: - raise ValueError( - f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}" - ) + validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations) if ( masks_path is not None - and format == AnnotionFormat.COCO_PANOPTIC + and format == AnnotationFormat.COCO_PANOPTIC and not isinstance(masks_path, (pathlib.Path, str)) ): raise ValueError( diff --git a/src/transformers/models/deta/image_processing_deta.py b/src/transformers/models/deta/image_processing_deta.py index 619139dae5..1e3ece8e32 100644 --- a/src/transformers/models/deta/image_processing_deta.py +++ b/src/transformers/models/deta/image_processing_deta.py @@ -34,6 +34,8 @@ from ...image_transforms import ( from ...image_utils import ( IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, + AnnotationFormat, + AnnotionFormat, # noqa: F401 ChannelDimension, ImageInput, PILImageResampling, @@ -42,9 +44,8 @@ from ...image_utils import ( is_batched, is_scaled_image, to_numpy_array, - valid_coco_detection_annotations, - valid_coco_panoptic_annotations, valid_images, + validate_annotations, ) from ...utils import ( is_flax_available, @@ -57,7 +58,7 @@ from ...utils import ( is_vision_available, logging, ) -from ...utils.generic import ExplicitEnum, TensorType +from ...utils.generic import TensorType if is_torch_available(): @@ -73,13 +74,7 @@ if is_vision_available(): logger = logging.get_logger(__name__) # pylint: disable=invalid-name - -class AnnotionFormat(ExplicitEnum): - COCO_DETECTION = "coco_detection" - COCO_PANOPTIC = "coco_panoptic" - - -SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC) +SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC) # Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio @@ -507,7 +502,7 @@ class DetaImageProcessor(BaseImageProcessor): def __init__( self, - format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, + format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, size: Dict[str, int] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, @@ -542,7 +537,7 @@ class DetaImageProcessor(BaseImageProcessor): self, image: np.ndarray, target: Dict, - format: Optional[AnnotionFormat] = None, + format: Optional[AnnotationFormat] = None, return_segmentation_masks: bool = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -552,12 +547,12 @@ class DetaImageProcessor(BaseImageProcessor): """ format = format if format is not None else self.format - if format == AnnotionFormat.COCO_DETECTION: + if format == AnnotationFormat.COCO_DETECTION: return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_detection_annotation( image, target, return_segmentation_masks, input_data_format=input_data_format ) - elif format == AnnotionFormat.COCO_PANOPTIC: + elif format == AnnotationFormat.COCO_PANOPTIC: return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_panoptic_annotation( image, @@ -789,7 +784,7 @@ class DetaImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, - format: Optional[Union[str, AnnotionFormat]] = None, + format: Optional[Union[str, AnnotationFormat]] = None, return_tensors: Optional[Union[TensorType, str]] = None, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -803,12 +798,12 @@ class DetaImageProcessor(BaseImageProcessor): Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. annotations (`List[Dict]` or `List[List[Dict]]`, *optional*): - List of annotations associated with the image or batch of images. If annotionation is for object + List of annotations associated with the image or batch of images. If annotation is for object detection, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a dictionary. An image can have no annotations, in which case the list should be empty. - If annotionation is for segmentation, the annotations should be a dictionary with the following keys: + If annotation is for segmentation, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary. An image can have no segments, in which case the list should be empty. @@ -835,7 +830,7 @@ class DetaImageProcessor(BaseImageProcessor): Standard deviation to use when normalizing the image. do_pad (`bool`, *optional*, defaults to self.do_pad): Whether to pad the image. - format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): + format (`str` or `AnnotationFormat`, *optional*, defaults to self.format): Format of the annotations. return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): Type of tensors to return. If `None`, will return the list of images. @@ -894,28 +889,13 @@ class DetaImageProcessor(BaseImageProcessor): "torch.Tensor, tf.Tensor or jax.ndarray." ) - format = AnnotionFormat(format) + format = AnnotationFormat(format) if annotations is not None: - if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): - raise ValueError( - "Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id` and `annotations`, with the latter " - "being a list of annotations in the COCO format." - ) - elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations): - raise ValueError( - "Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with " - "the latter being a list of annotations in the COCO format." - ) - elif format not in SUPPORTED_ANNOTATION_FORMATS: - raise ValueError( - f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}" - ) + validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations) if ( masks_path is not None - and format == AnnotionFormat.COCO_PANOPTIC + and format == AnnotationFormat.COCO_PANOPTIC and not isinstance(masks_path, (pathlib.Path, str)) ): raise ValueError( diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index c4e4b97c63..8b64b9c4d9 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -36,6 +36,9 @@ from ...image_transforms import ( from ...image_utils import ( IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, + AnnotationFormat, + AnnotationType, + AnnotionFormat, # noqa: F401 ChannelDimension, ImageInput, PILImageResampling, @@ -44,12 +47,10 @@ from ...image_utils import ( is_scaled_image, make_list_of_images, to_numpy_array, - valid_coco_detection_annotations, - valid_coco_panoptic_annotations, valid_images, + validate_annotations, ) from ...utils import ( - ExplicitEnum, TensorType, is_flax_available, is_jax_tensor, @@ -79,15 +80,7 @@ if is_scipy_available(): logger = logging.get_logger(__name__) # pylint: disable=invalid-name -AnnotationType = Dict[str, Union[int, str, List[Dict]]] - - -class AnnotionFormat(ExplicitEnum): - COCO_DETECTION = "coco_detection" - COCO_PANOPTIC = "coco_panoptic" - - -SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC) +SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC) def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]: @@ -785,7 +778,7 @@ class DetrImageProcessor(BaseImageProcessor): def __init__( self, - format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, + format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, size: Dict[str, int] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, @@ -842,7 +835,7 @@ class DetrImageProcessor(BaseImageProcessor): self, image: np.ndarray, target: Dict, - format: Optional[AnnotionFormat] = None, + format: Optional[AnnotationFormat] = None, return_segmentation_masks: bool = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -852,12 +845,12 @@ class DetrImageProcessor(BaseImageProcessor): """ format = format if format is not None else self.format - if format == AnnotionFormat.COCO_DETECTION: + if format == AnnotationFormat.COCO_DETECTION: return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_detection_annotation( image, target, return_segmentation_masks, input_data_format=input_data_format ) - elif format == AnnotionFormat.COCO_PANOPTIC: + elif format == AnnotationFormat.COCO_PANOPTIC: return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_panoptic_annotation( image, @@ -1089,7 +1082,7 @@ class DetrImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, - format: Optional[Union[str, AnnotionFormat]] = None, + format: Optional[Union[str, AnnotationFormat]] = None, return_tensors: Optional[Union[TensorType, str]] = None, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -1135,7 +1128,7 @@ class DetrImageProcessor(BaseImageProcessor): Standard deviation to use when normalizing the image. do_pad (`bool`, *optional*, defaults to self.do_pad): Whether to pad the image. - format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): + format (`str` or `AnnotationFormat`, *optional*, defaults to self.format): Format of the annotations. return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): Type of tensors to return. If `None`, will return the list of images. @@ -1202,28 +1195,13 @@ class DetrImageProcessor(BaseImageProcessor): "torch.Tensor, tf.Tensor or jax.ndarray." ) - format = AnnotionFormat(format) + format = AnnotationFormat(format) if annotations is not None: - if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): - raise ValueError( - "Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id` and `annotations`, with the latter " - "being a list of annotations in the COCO format." - ) - elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations): - raise ValueError( - "Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with " - "the latter being a list of annotations in the COCO format." - ) - elif format not in SUPPORTED_ANNOTATION_FORMATS: - raise ValueError( - f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}" - ) + validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations) if ( masks_path is not None - and format == AnnotionFormat.COCO_PANOPTIC + and format == AnnotationFormat.COCO_PANOPTIC and not isinstance(masks_path, (pathlib.Path, str)) ): raise ValueError( diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index 074b1df4d7..4b59fd5ef0 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -35,6 +35,9 @@ from ...image_transforms import ( from ...image_utils import ( IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, + AnnotationFormat, + AnnotationType, + AnnotionFormat, # noqa: F401 ChannelDimension, ImageInput, PILImageResampling, @@ -43,12 +46,10 @@ from ...image_utils import ( is_scaled_image, make_list_of_images, to_numpy_array, - valid_coco_detection_annotations, - valid_coco_panoptic_annotations, valid_images, + validate_annotations, ) from ...utils import ( - ExplicitEnum, TensorType, is_flax_available, is_jax_tensor, @@ -77,15 +78,7 @@ if is_scipy_available(): logger = logging.get_logger(__name__) -AnnotationType = Dict[str, Union[int, str, List[Dict]]] - - -class AnnotionFormat(ExplicitEnum): - COCO_DETECTION = "coco_detection" - COCO_PANOPTIC = "coco_panoptic" - - -SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC) +SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC) # Copied from transformers.models.detr.image_processing_detr.get_max_height_width @@ -712,7 +705,7 @@ class YolosImageProcessor(BaseImageProcessor): def __init__( self, - format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION, + format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, size: Dict[str, int] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, @@ -771,7 +764,7 @@ class YolosImageProcessor(BaseImageProcessor): self, image: np.ndarray, target: Dict, - format: Optional[AnnotionFormat] = None, + format: Optional[AnnotationFormat] = None, return_segmentation_masks: bool = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -781,12 +774,12 @@ class YolosImageProcessor(BaseImageProcessor): """ format = format if format is not None else self.format - if format == AnnotionFormat.COCO_DETECTION: + if format == AnnotationFormat.COCO_DETECTION: return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_detection_annotation( image, target, return_segmentation_masks, input_data_format=input_data_format ) - elif format == AnnotionFormat.COCO_PANOPTIC: + elif format == AnnotationFormat.COCO_PANOPTIC: return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks target = prepare_coco_panoptic_annotation( image, @@ -1026,7 +1019,7 @@ class YolosImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, - format: Optional[Union[str, AnnotionFormat]] = None, + format: Optional[Union[str, AnnotationFormat]] = None, return_tensors: Optional[Union[TensorType, str]] = None, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -1040,12 +1033,12 @@ class YolosImageProcessor(BaseImageProcessor): Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): - List of annotations associated with the image or batch of images. If annotionation is for object + List of annotations associated with the image or batch of images. If annotation is for object detection, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a dictionary. An image can have no annotations, in which case the list should be empty. - If annotionation is for segmentation, the annotations should be a dictionary with the following keys: + If annotation is for segmentation, the annotations should be a dictionary with the following keys: - "image_id" (`int`): The image id. - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary. An image can have no segments, in which case the list should be empty. @@ -1072,7 +1065,7 @@ class YolosImageProcessor(BaseImageProcessor): Standard deviation to use when normalizing the image. do_pad (`bool`, *optional*, defaults to self.do_pad): Whether to pad the image. - format (`str` or `AnnotionFormat`, *optional*, defaults to self.format): + format (`str` or `AnnotationFormat`, *optional*, defaults to self.format): Format of the annotations. return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors): Type of tensors to return. If `None`, will return the list of images. @@ -1136,28 +1129,13 @@ class YolosImageProcessor(BaseImageProcessor): "torch.Tensor, tf.Tensor or jax.ndarray." ) - format = AnnotionFormat(format) + format = AnnotationFormat(format) if annotations is not None: - if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): - raise ValueError( - "Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id` and `annotations`, with the latter " - "being a list of annotations in the COCO format." - ) - elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations): - raise ValueError( - "Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts " - "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with " - "the latter being a list of annotations in the COCO format." - ) - elif format not in SUPPORTED_ANNOTATION_FORMATS: - raise ValueError( - f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}" - ) + validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations) if ( masks_path is not None - and format == AnnotionFormat.COCO_PANOPTIC + and format == AnnotationFormat.COCO_PANOPTIC and not isinstance(masks_path, (pathlib.Path, str)) ): raise ValueError( diff --git a/tests/models/conditional_detr/test_image_processing_conditional_detr.py b/tests/models/conditional_detr/test_image_processing_conditional_detr.py index cc3fd501a6..4b18a6ecd7 100644 --- a/tests/models/conditional_detr/test_image_processing_conditional_detr.py +++ b/tests/models/conditional_detr/test_image_processing_conditional_detr.py @@ -21,7 +21,7 @@ import unittest from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs +from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -127,7 +127,7 @@ class ConditionalDetrImageProcessingTester(unittest.TestCase): @require_torch @require_vision -class ConditionalDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): +class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase): image_processing_class = ConditionalDetrImageProcessor if is_vision_available() else None def setUp(self): diff --git a/tests/models/deformable_detr/test_image_processing_deformable_detr.py b/tests/models/deformable_detr/test_image_processing_deformable_detr.py index 4fd2de49f7..ec65f7b9a5 100644 --- a/tests/models/deformable_detr/test_image_processing_deformable_detr.py +++ b/tests/models/deformable_detr/test_image_processing_deformable_detr.py @@ -21,7 +21,7 @@ import unittest from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs +from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -127,7 +127,7 @@ class DeformableDetrImageProcessingTester(unittest.TestCase): @require_torch @require_vision -class DeformableDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): +class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DeformableDetrImageProcessor if is_vision_available() else None def setUp(self): diff --git a/tests/models/deta/test_image_processing_deta.py b/tests/models/deta/test_image_processing_deta.py index 7cde8474bf..1e48147607 100644 --- a/tests/models/deta/test_image_processing_deta.py +++ b/tests/models/deta/test_image_processing_deta.py @@ -21,7 +21,7 @@ import unittest from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs +from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -127,7 +127,7 @@ class DetaImageProcessingTester(unittest.TestCase): @require_torch @require_vision -class DetaImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): +class DetaImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DetaImageProcessor if is_vision_available() else None def setUp(self): diff --git a/tests/models/detr/test_image_processing_detr.py b/tests/models/detr/test_image_processing_detr.py index 2a095b259e..7a5cb9efed 100644 --- a/tests/models/detr/test_image_processing_detr.py +++ b/tests/models/detr/test_image_processing_detr.py @@ -21,7 +21,7 @@ import unittest from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs +from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -127,7 +127,7 @@ class DetrImageProcessingTester(unittest.TestCase): @require_torch @require_vision -class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): +class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase): image_processing_class = DetrImageProcessor if is_vision_available() else None def setUp(self): @@ -159,6 +159,63 @@ class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84}) self.assertEqual(image_processor.do_pad, False) + def test_should_raise_if_annotation_format_invalid(self): + image_processor_dict = self.image_processor_tester.prepare_image_processor_dict() + + with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f: + detection_target = json.loads(f.read()) + + annotations = {"image_id": 39769, "annotations": detection_target} + + params = { + "images": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), + "annotations": annotations, + "return_tensors": "pt", + } + + image_processor_params = {**image_processor_dict, **{"format": "_INVALID_FORMAT_"}} + image_processor = self.image_processing_class(**image_processor_params) + + with self.assertRaises(ValueError) as e: + image_processor(**params) + + self.assertTrue(str(e.exception).startswith("_INVALID_FORMAT_ is not a valid AnnotationFormat")) + + def test_valid_coco_detection_annotations(self): + # prepare image and target + image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") + with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f: + target = json.loads(f.read()) + + params = {"image_id": 39769, "annotations": target} + + # encode them + image_processing = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") + + # legal encodings (single image) + _ = image_processing(images=image, annotations=params, return_tensors="pt") + _ = image_processing(images=image, annotations=[params], return_tensors="pt") + + # legal encodings (batch of one image) + _ = image_processing(images=[image], annotations=params, return_tensors="pt") + _ = image_processing(images=[image], annotations=[params], return_tensors="pt") + + # legal encoding (batch of more than one image) + n = 5 + _ = image_processing(images=[image] * n, annotations=[params] * n, return_tensors="pt") + + # example of an illegal encoding (missing the 'image_id' key) + with self.assertRaises(ValueError) as e: + image_processing(images=image, annotations={"annotations": target}, return_tensors="pt") + + self.assertTrue(str(e.exception).startswith("Invalid COCO detection annotations")) + + # example of an illegal encoding (unequal lengths of images and annotations) + with self.assertRaises(ValueError) as e: + image_processing(images=[image] * n, annotations=[params] * (n - 1), return_tensors="pt") + + self.assertTrue(str(e.exception) == "The number of images (5) and annotations (4) do not match.") + @slow def test_call_pytorch_with_coco_detection_annotations(self): # prepare image and target diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py index 003a006110..1039e4c91b 100644 --- a/tests/models/yolos/test_image_processing_yolos.py +++ b/tests/models/yolos/test_image_processing_yolos.py @@ -21,7 +21,7 @@ import unittest from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available -from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs +from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs if is_torch_available(): @@ -127,7 +127,7 @@ class YolosImageProcessingTester(unittest.TestCase): @require_torch @require_vision -class YolosImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase): +class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase): image_processing_class = YolosImageProcessor if is_vision_available() else None def setUp(self): diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py index cb78b33375..dcbee270f9 100644 --- a/tests/test_image_processing_common.py +++ b/tests/test_image_processing_common.py @@ -15,8 +15,11 @@ import json import os +import pathlib import tempfile +from transformers import BatchFeature +from transformers.image_utils import AnnotationFormat, AnnotionFormat from transformers.testing_utils import check_json_file_has_correct_format, require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available @@ -285,3 +288,81 @@ class ImageProcessingTestMixin: self.assertEqual( tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape) ) + + +class AnnotationFormatTestMixin: + # this mixin adds a test to assert that usages of the + # to-be-deprecated `AnnotionFormat` continue to be + # supported for the time being + + def test_processor_can_use_legacy_annotation_format(self): + image_processor_dict = self.image_processor_tester.prepare_image_processor_dict() + fixtures_path = pathlib.Path(__file__).parent / "fixtures" / "tests_samples" / "COCO" + + with open(fixtures_path / "coco_annotations.txt", "r") as f: + detection_target = json.loads(f.read()) + + detection_annotations = {"image_id": 39769, "annotations": detection_target} + + detection_params = { + "images": Image.open(fixtures_path / "000000039769.png"), + "annotations": detection_annotations, + "return_tensors": "pt", + } + + with open(fixtures_path / "coco_panoptic_annotations.txt", "r") as f: + panoptic_target = json.loads(f.read()) + + panoptic_annotations = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": panoptic_target} + + masks_path = pathlib.Path(fixtures_path / "coco_panoptic") + + panoptic_params = { + "images": Image.open(fixtures_path / "000000039769.png"), + "annotations": panoptic_annotations, + "return_tensors": "pt", + "masks_path": masks_path, + } + + test_cases = [ + ("coco_detection", detection_params), + ("coco_panoptic", panoptic_params), + (AnnotionFormat.COCO_DETECTION, detection_params), + (AnnotionFormat.COCO_PANOPTIC, panoptic_params), + (AnnotationFormat.COCO_DETECTION, detection_params), + (AnnotationFormat.COCO_PANOPTIC, panoptic_params), + ] + + def _compare(a, b) -> None: + if isinstance(a, (dict, BatchFeature)): + self.assertEqual(a.keys(), b.keys()) + for k, v in a.items(): + _compare(v, b[k]) + elif isinstance(a, list): + self.assertEqual(len(a), len(b)) + for idx in range(len(a)): + _compare(a[idx], b[idx]) + elif isinstance(a, torch.Tensor): + self.assertTrue(torch.allclose(a, b, atol=1e-3)) + elif isinstance(a, str): + self.assertEqual(a, b) + + for annotation_format, params in test_cases: + with self.subTest(annotation_format): + image_processor_params = {**image_processor_dict, **{"format": annotation_format}} + image_processor_first = self.image_processing_class(**image_processor_params) + + with tempfile.TemporaryDirectory() as tmpdirname: + image_processor_first.save_pretrained(tmpdirname) + image_processor_second = self.image_processing_class.from_pretrained(tmpdirname) + + # check the 'format' key exists and that the dicts of the + # first and second processors are equal + self.assertIn("format", image_processor_first.to_dict().keys()) + self.assertEqual(image_processor_second.to_dict(), image_processor_first.to_dict()) + + # perform encoding using both processors and compare + # the resulting BatchFeatures + first_encoding = image_processor_first(**params) + second_encoding = image_processor_second(**params) + _compare(first_encoding, second_encoding)