Fix a couple of typos and add an illustrative test (#26941)
* fix a typo and add an illustrative test * appease black * reduce code duplication and add Annotion type back with a pending deprecation warning * remove unused code * change warning type * black formatting fix * change enum deprecation approach to support 3.8 and earlier * add stacklevel * fix black issue * fix ruff issues * fix ruff issues * move tests to own mixin * include yolos * fix black formatting issue * fix black formatting issue * use logger instead of warnings and include target version for deprecation
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
|
||||
import base64
|
||||
import os
|
||||
from enum import EnumMeta
|
||||
from io import BytesIO
|
||||
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
@@ -29,6 +30,7 @@ from .utils import (
|
||||
is_torch_available,
|
||||
is_torch_tensor,
|
||||
is_vision_available,
|
||||
logging,
|
||||
requires_backends,
|
||||
to_numpy,
|
||||
)
|
||||
@@ -56,6 +58,9 @@ if TYPE_CHECKING:
|
||||
import torch
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
ImageInput = Union[
|
||||
"PIL.Image.Image", np.ndarray, "torch.Tensor", List["PIL.Image.Image"], List[np.ndarray], List["torch.Tensor"]
|
||||
] # noqa
|
||||
@@ -66,6 +71,28 @@ class ChannelDimension(ExplicitEnum):
|
||||
LAST = "channels_last"
|
||||
|
||||
|
||||
class AnnotationFormat(ExplicitEnum):
|
||||
COCO_DETECTION = "coco_detection"
|
||||
COCO_PANOPTIC = "coco_panoptic"
|
||||
|
||||
|
||||
class DeprecatedEnumMeta(EnumMeta):
|
||||
def __init__(cls, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
logger.warning_once(
|
||||
f"`{cls.__name__}` is deprecated and will be removed in v4.38. "
|
||||
f"Please use `transformers.image_utils.AnnotationFormat` instead."
|
||||
)
|
||||
|
||||
|
||||
class AnnotionFormat(ExplicitEnum, metaclass=DeprecatedEnumMeta):
|
||||
COCO_DETECTION = AnnotationFormat.COCO_DETECTION.value
|
||||
COCO_PANOPTIC = AnnotationFormat.COCO_PANOPTIC.value
|
||||
|
||||
|
||||
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
|
||||
|
||||
|
||||
def is_pil_image(img):
|
||||
return is_vision_available() and isinstance(img, PIL.Image.Image)
|
||||
|
||||
@@ -664,3 +691,33 @@ class ImageFeatureExtractionMixin:
|
||||
return image.rotate(
|
||||
angle, resample=resample, expand=expand, center=center, translate=translate, fillcolor=fillcolor
|
||||
)
|
||||
|
||||
|
||||
def promote_annotation_format(annotation_format: Union[AnnotionFormat, AnnotationFormat]) -> AnnotationFormat:
|
||||
# can be removed when `AnnotionFormat` is fully deprecated
|
||||
return AnnotationFormat(annotation_format.value)
|
||||
|
||||
|
||||
def validate_annotations(
|
||||
annotation_format: AnnotationFormat,
|
||||
supported_annotation_formats: Tuple[AnnotationFormat, ...],
|
||||
annotations: List[Dict],
|
||||
) -> None:
|
||||
if promote_annotation_format(annotation_format) not in supported_annotation_formats:
|
||||
raise ValueError(f"Unsupported annotation format: {format} must be one of {supported_annotation_formats}")
|
||||
|
||||
if promote_annotation_format(annotation_format) is AnnotationFormat.COCO_DETECTION:
|
||||
if not valid_coco_detection_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO detection annotations. Annotations must a dict (single image) or list of dicts "
|
||||
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
|
||||
"being a list of annotations in the COCO format."
|
||||
)
|
||||
|
||||
if promote_annotation_format(annotation_format) is AnnotationFormat.COCO_PANOPTIC:
|
||||
if not valid_coco_panoptic_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO panoptic annotations. Annotations must a dict (single image) or list of dicts "
|
||||
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
|
||||
"the latter being a list of annotations in the COCO format."
|
||||
)
|
||||
|
||||
@@ -37,6 +37,9 @@ from ...image_transforms import (
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
AnnotationFormat,
|
||||
AnnotationType,
|
||||
AnnotionFormat, # noqa: F401
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
@@ -45,12 +48,10 @@ from ...image_utils import (
|
||||
is_scaled_image,
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_coco_detection_annotations,
|
||||
valid_coco_panoptic_annotations,
|
||||
valid_images,
|
||||
validate_annotations,
|
||||
)
|
||||
from ...utils import (
|
||||
ExplicitEnum,
|
||||
TensorType,
|
||||
is_flax_available,
|
||||
is_jax_tensor,
|
||||
@@ -80,15 +81,8 @@ if is_scipy_available():
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
|
||||
|
||||
|
||||
class AnnotionFormat(ExplicitEnum):
|
||||
COCO_DETECTION = "coco_detection"
|
||||
COCO_PANOPTIC = "coco_panoptic"
|
||||
|
||||
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
|
||||
|
||||
|
||||
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
|
||||
@@ -802,7 +796,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
||||
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__
|
||||
def __init__(
|
||||
self,
|
||||
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
|
||||
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
|
||||
do_resize: bool = True,
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BILINEAR,
|
||||
@@ -861,7 +855,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
image: np.ndarray,
|
||||
target: Dict,
|
||||
format: Optional[AnnotionFormat] = None,
|
||||
format: Optional[AnnotationFormat] = None,
|
||||
return_segmentation_masks: bool = None,
|
||||
masks_path: Optional[Union[str, pathlib.Path]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -871,12 +865,12 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
||||
"""
|
||||
format = format if format is not None else self.format
|
||||
|
||||
if format == AnnotionFormat.COCO_DETECTION:
|
||||
if format == AnnotationFormat.COCO_DETECTION:
|
||||
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_detection_annotation(
|
||||
image, target, return_segmentation_masks, input_data_format=input_data_format
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC:
|
||||
elif format == AnnotationFormat.COCO_PANOPTIC:
|
||||
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_panoptic_annotation(
|
||||
image,
|
||||
@@ -1118,7 +1112,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: Optional[bool] = None,
|
||||
format: Optional[Union[str, AnnotionFormat]] = None,
|
||||
format: Optional[Union[str, AnnotationFormat]] = None,
|
||||
return_tensors: Optional[Union[TensorType, str]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -1164,7 +1158,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
||||
Standard deviation to use when normalizing the image.
|
||||
do_pad (`bool`, *optional*, defaults to self.do_pad):
|
||||
Whether to pad the image.
|
||||
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
|
||||
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
|
||||
Format of the annotations.
|
||||
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
|
||||
Type of tensors to return. If `None`, will return the list of images.
|
||||
@@ -1231,28 +1225,13 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
||||
"torch.Tensor, tf.Tensor or jax.ndarray."
|
||||
)
|
||||
|
||||
format = AnnotionFormat(format)
|
||||
format = AnnotationFormat(format)
|
||||
if annotations is not None:
|
||||
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
|
||||
"being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
|
||||
"the latter being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format not in SUPPORTED_ANNOTATION_FORMATS:
|
||||
raise ValueError(
|
||||
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
|
||||
)
|
||||
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
|
||||
|
||||
if (
|
||||
masks_path is not None
|
||||
and format == AnnotionFormat.COCO_PANOPTIC
|
||||
and format == AnnotationFormat.COCO_PANOPTIC
|
||||
and not isinstance(masks_path, (pathlib.Path, str))
|
||||
):
|
||||
raise ValueError(
|
||||
|
||||
@@ -37,6 +37,9 @@ from ...image_transforms import (
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
AnnotationFormat,
|
||||
AnnotationType,
|
||||
AnnotionFormat, # noqa: F401
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
@@ -45,12 +48,10 @@ from ...image_utils import (
|
||||
is_scaled_image,
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_coco_detection_annotations,
|
||||
valid_coco_panoptic_annotations,
|
||||
valid_images,
|
||||
validate_annotations,
|
||||
)
|
||||
from ...utils import (
|
||||
ExplicitEnum,
|
||||
TensorType,
|
||||
is_flax_available,
|
||||
is_jax_tensor,
|
||||
@@ -79,15 +80,7 @@ if is_scipy_available():
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
|
||||
|
||||
|
||||
class AnnotionFormat(ExplicitEnum):
|
||||
COCO_DETECTION = "coco_detection"
|
||||
COCO_PANOPTIC = "coco_panoptic"
|
||||
|
||||
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
|
||||
|
||||
|
||||
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
|
||||
@@ -801,7 +794,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
|
||||
# Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.__init__
|
||||
def __init__(
|
||||
self,
|
||||
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
|
||||
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
|
||||
do_resize: bool = True,
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BILINEAR,
|
||||
@@ -860,7 +853,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
image: np.ndarray,
|
||||
target: Dict,
|
||||
format: Optional[AnnotionFormat] = None,
|
||||
format: Optional[AnnotationFormat] = None,
|
||||
return_segmentation_masks: bool = None,
|
||||
masks_path: Optional[Union[str, pathlib.Path]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -870,12 +863,12 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
|
||||
"""
|
||||
format = format if format is not None else self.format
|
||||
|
||||
if format == AnnotionFormat.COCO_DETECTION:
|
||||
if format == AnnotationFormat.COCO_DETECTION:
|
||||
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_detection_annotation(
|
||||
image, target, return_segmentation_masks, input_data_format=input_data_format
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC:
|
||||
elif format == AnnotationFormat.COCO_PANOPTIC:
|
||||
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_panoptic_annotation(
|
||||
image,
|
||||
@@ -1117,7 +1110,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: Optional[bool] = None,
|
||||
format: Optional[Union[str, AnnotionFormat]] = None,
|
||||
format: Optional[Union[str, AnnotationFormat]] = None,
|
||||
return_tensors: Optional[Union[TensorType, str]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -1163,7 +1156,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
|
||||
Standard deviation to use when normalizing the image.
|
||||
do_pad (`bool`, *optional*, defaults to self.do_pad):
|
||||
Whether to pad the image.
|
||||
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
|
||||
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
|
||||
Format of the annotations.
|
||||
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
|
||||
Type of tensors to return. If `None`, will return the list of images.
|
||||
@@ -1230,28 +1223,13 @@ class DeformableDetrImageProcessor(BaseImageProcessor):
|
||||
"torch.Tensor, tf.Tensor or jax.ndarray."
|
||||
)
|
||||
|
||||
format = AnnotionFormat(format)
|
||||
format = AnnotationFormat(format)
|
||||
if annotations is not None:
|
||||
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
|
||||
"being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
|
||||
"the latter being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format not in SUPPORTED_ANNOTATION_FORMATS:
|
||||
raise ValueError(
|
||||
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
|
||||
)
|
||||
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
|
||||
|
||||
if (
|
||||
masks_path is not None
|
||||
and format == AnnotionFormat.COCO_PANOPTIC
|
||||
and format == AnnotationFormat.COCO_PANOPTIC
|
||||
and not isinstance(masks_path, (pathlib.Path, str))
|
||||
):
|
||||
raise ValueError(
|
||||
|
||||
@@ -34,6 +34,8 @@ from ...image_transforms import (
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
AnnotationFormat,
|
||||
AnnotionFormat, # noqa: F401
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
@@ -42,9 +44,8 @@ from ...image_utils import (
|
||||
is_batched,
|
||||
is_scaled_image,
|
||||
to_numpy_array,
|
||||
valid_coco_detection_annotations,
|
||||
valid_coco_panoptic_annotations,
|
||||
valid_images,
|
||||
validate_annotations,
|
||||
)
|
||||
from ...utils import (
|
||||
is_flax_available,
|
||||
@@ -57,7 +58,7 @@ from ...utils import (
|
||||
is_vision_available,
|
||||
logging,
|
||||
)
|
||||
from ...utils.generic import ExplicitEnum, TensorType
|
||||
from ...utils.generic import TensorType
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -73,13 +74,7 @@ if is_vision_available():
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
class AnnotionFormat(ExplicitEnum):
|
||||
COCO_DETECTION = "coco_detection"
|
||||
COCO_PANOPTIC = "coco_panoptic"
|
||||
|
||||
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
|
||||
|
||||
|
||||
# Copied from transformers.models.detr.image_processing_detr.get_size_with_aspect_ratio
|
||||
@@ -507,7 +502,7 @@ class DetaImageProcessor(BaseImageProcessor):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
|
||||
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
|
||||
do_resize: bool = True,
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BILINEAR,
|
||||
@@ -542,7 +537,7 @@ class DetaImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
image: np.ndarray,
|
||||
target: Dict,
|
||||
format: Optional[AnnotionFormat] = None,
|
||||
format: Optional[AnnotationFormat] = None,
|
||||
return_segmentation_masks: bool = None,
|
||||
masks_path: Optional[Union[str, pathlib.Path]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -552,12 +547,12 @@ class DetaImageProcessor(BaseImageProcessor):
|
||||
"""
|
||||
format = format if format is not None else self.format
|
||||
|
||||
if format == AnnotionFormat.COCO_DETECTION:
|
||||
if format == AnnotationFormat.COCO_DETECTION:
|
||||
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_detection_annotation(
|
||||
image, target, return_segmentation_masks, input_data_format=input_data_format
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC:
|
||||
elif format == AnnotationFormat.COCO_PANOPTIC:
|
||||
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_panoptic_annotation(
|
||||
image,
|
||||
@@ -789,7 +784,7 @@ class DetaImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: Optional[bool] = None,
|
||||
format: Optional[Union[str, AnnotionFormat]] = None,
|
||||
format: Optional[Union[str, AnnotationFormat]] = None,
|
||||
return_tensors: Optional[Union[TensorType, str]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -803,12 +798,12 @@ class DetaImageProcessor(BaseImageProcessor):
|
||||
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
|
||||
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
|
||||
annotations (`List[Dict]` or `List[List[Dict]]`, *optional*):
|
||||
List of annotations associated with the image or batch of images. If annotionation is for object
|
||||
List of annotations associated with the image or batch of images. If annotation is for object
|
||||
detection, the annotations should be a dictionary with the following keys:
|
||||
- "image_id" (`int`): The image id.
|
||||
- "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
|
||||
dictionary. An image can have no annotations, in which case the list should be empty.
|
||||
If annotionation is for segmentation, the annotations should be a dictionary with the following keys:
|
||||
If annotation is for segmentation, the annotations should be a dictionary with the following keys:
|
||||
- "image_id" (`int`): The image id.
|
||||
- "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
|
||||
An image can have no segments, in which case the list should be empty.
|
||||
@@ -835,7 +830,7 @@ class DetaImageProcessor(BaseImageProcessor):
|
||||
Standard deviation to use when normalizing the image.
|
||||
do_pad (`bool`, *optional*, defaults to self.do_pad):
|
||||
Whether to pad the image.
|
||||
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
|
||||
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
|
||||
Format of the annotations.
|
||||
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
|
||||
Type of tensors to return. If `None`, will return the list of images.
|
||||
@@ -894,28 +889,13 @@ class DetaImageProcessor(BaseImageProcessor):
|
||||
"torch.Tensor, tf.Tensor or jax.ndarray."
|
||||
)
|
||||
|
||||
format = AnnotionFormat(format)
|
||||
format = AnnotationFormat(format)
|
||||
if annotations is not None:
|
||||
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
|
||||
"being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
|
||||
"the latter being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format not in SUPPORTED_ANNOTATION_FORMATS:
|
||||
raise ValueError(
|
||||
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
|
||||
)
|
||||
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
|
||||
|
||||
if (
|
||||
masks_path is not None
|
||||
and format == AnnotionFormat.COCO_PANOPTIC
|
||||
and format == AnnotationFormat.COCO_PANOPTIC
|
||||
and not isinstance(masks_path, (pathlib.Path, str))
|
||||
):
|
||||
raise ValueError(
|
||||
|
||||
@@ -36,6 +36,9 @@ from ...image_transforms import (
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
AnnotationFormat,
|
||||
AnnotationType,
|
||||
AnnotionFormat, # noqa: F401
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
@@ -44,12 +47,10 @@ from ...image_utils import (
|
||||
is_scaled_image,
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_coco_detection_annotations,
|
||||
valid_coco_panoptic_annotations,
|
||||
valid_images,
|
||||
validate_annotations,
|
||||
)
|
||||
from ...utils import (
|
||||
ExplicitEnum,
|
||||
TensorType,
|
||||
is_flax_available,
|
||||
is_jax_tensor,
|
||||
@@ -79,15 +80,7 @@ if is_scipy_available():
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
|
||||
|
||||
|
||||
class AnnotionFormat(ExplicitEnum):
|
||||
COCO_DETECTION = "coco_detection"
|
||||
COCO_PANOPTIC = "coco_panoptic"
|
||||
|
||||
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
|
||||
|
||||
|
||||
def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, int]:
|
||||
@@ -785,7 +778,7 @@ class DetrImageProcessor(BaseImageProcessor):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
|
||||
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
|
||||
do_resize: bool = True,
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BILINEAR,
|
||||
@@ -842,7 +835,7 @@ class DetrImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
image: np.ndarray,
|
||||
target: Dict,
|
||||
format: Optional[AnnotionFormat] = None,
|
||||
format: Optional[AnnotationFormat] = None,
|
||||
return_segmentation_masks: bool = None,
|
||||
masks_path: Optional[Union[str, pathlib.Path]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -852,12 +845,12 @@ class DetrImageProcessor(BaseImageProcessor):
|
||||
"""
|
||||
format = format if format is not None else self.format
|
||||
|
||||
if format == AnnotionFormat.COCO_DETECTION:
|
||||
if format == AnnotationFormat.COCO_DETECTION:
|
||||
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_detection_annotation(
|
||||
image, target, return_segmentation_masks, input_data_format=input_data_format
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC:
|
||||
elif format == AnnotationFormat.COCO_PANOPTIC:
|
||||
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_panoptic_annotation(
|
||||
image,
|
||||
@@ -1089,7 +1082,7 @@ class DetrImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: Optional[bool] = None,
|
||||
format: Optional[Union[str, AnnotionFormat]] = None,
|
||||
format: Optional[Union[str, AnnotationFormat]] = None,
|
||||
return_tensors: Optional[Union[TensorType, str]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -1135,7 +1128,7 @@ class DetrImageProcessor(BaseImageProcessor):
|
||||
Standard deviation to use when normalizing the image.
|
||||
do_pad (`bool`, *optional*, defaults to self.do_pad):
|
||||
Whether to pad the image.
|
||||
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
|
||||
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
|
||||
Format of the annotations.
|
||||
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
|
||||
Type of tensors to return. If `None`, will return the list of images.
|
||||
@@ -1202,28 +1195,13 @@ class DetrImageProcessor(BaseImageProcessor):
|
||||
"torch.Tensor, tf.Tensor or jax.ndarray."
|
||||
)
|
||||
|
||||
format = AnnotionFormat(format)
|
||||
format = AnnotationFormat(format)
|
||||
if annotations is not None:
|
||||
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
|
||||
"being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
|
||||
"the latter being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format not in SUPPORTED_ANNOTATION_FORMATS:
|
||||
raise ValueError(
|
||||
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
|
||||
)
|
||||
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
|
||||
|
||||
if (
|
||||
masks_path is not None
|
||||
and format == AnnotionFormat.COCO_PANOPTIC
|
||||
and format == AnnotationFormat.COCO_PANOPTIC
|
||||
and not isinstance(masks_path, (pathlib.Path, str))
|
||||
):
|
||||
raise ValueError(
|
||||
|
||||
@@ -35,6 +35,9 @@ from ...image_transforms import (
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
AnnotationFormat,
|
||||
AnnotationType,
|
||||
AnnotionFormat, # noqa: F401
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
@@ -43,12 +46,10 @@ from ...image_utils import (
|
||||
is_scaled_image,
|
||||
make_list_of_images,
|
||||
to_numpy_array,
|
||||
valid_coco_detection_annotations,
|
||||
valid_coco_panoptic_annotations,
|
||||
valid_images,
|
||||
validate_annotations,
|
||||
)
|
||||
from ...utils import (
|
||||
ExplicitEnum,
|
||||
TensorType,
|
||||
is_flax_available,
|
||||
is_jax_tensor,
|
||||
@@ -77,15 +78,7 @@ if is_scipy_available():
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
AnnotationType = Dict[str, Union[int, str, List[Dict]]]
|
||||
|
||||
|
||||
class AnnotionFormat(ExplicitEnum):
|
||||
COCO_DETECTION = "coco_detection"
|
||||
COCO_PANOPTIC = "coco_panoptic"
|
||||
|
||||
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotionFormat.COCO_DETECTION, AnnotionFormat.COCO_PANOPTIC)
|
||||
SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC)
|
||||
|
||||
|
||||
# Copied from transformers.models.detr.image_processing_detr.get_max_height_width
|
||||
@@ -712,7 +705,7 @@ class YolosImageProcessor(BaseImageProcessor):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
format: Union[str, AnnotionFormat] = AnnotionFormat.COCO_DETECTION,
|
||||
format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION,
|
||||
do_resize: bool = True,
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BILINEAR,
|
||||
@@ -771,7 +764,7 @@ class YolosImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
image: np.ndarray,
|
||||
target: Dict,
|
||||
format: Optional[AnnotionFormat] = None,
|
||||
format: Optional[AnnotationFormat] = None,
|
||||
return_segmentation_masks: bool = None,
|
||||
masks_path: Optional[Union[str, pathlib.Path]] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -781,12 +774,12 @@ class YolosImageProcessor(BaseImageProcessor):
|
||||
"""
|
||||
format = format if format is not None else self.format
|
||||
|
||||
if format == AnnotionFormat.COCO_DETECTION:
|
||||
if format == AnnotationFormat.COCO_DETECTION:
|
||||
return_segmentation_masks = False if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_detection_annotation(
|
||||
image, target, return_segmentation_masks, input_data_format=input_data_format
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC:
|
||||
elif format == AnnotationFormat.COCO_PANOPTIC:
|
||||
return_segmentation_masks = True if return_segmentation_masks is None else return_segmentation_masks
|
||||
target = prepare_coco_panoptic_annotation(
|
||||
image,
|
||||
@@ -1026,7 +1019,7 @@ class YolosImageProcessor(BaseImageProcessor):
|
||||
image_mean: Optional[Union[float, List[float]]] = None,
|
||||
image_std: Optional[Union[float, List[float]]] = None,
|
||||
do_pad: Optional[bool] = None,
|
||||
format: Optional[Union[str, AnnotionFormat]] = None,
|
||||
format: Optional[Union[str, AnnotationFormat]] = None,
|
||||
return_tensors: Optional[Union[TensorType, str]] = None,
|
||||
data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
@@ -1040,12 +1033,12 @@ class YolosImageProcessor(BaseImageProcessor):
|
||||
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
|
||||
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
|
||||
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
|
||||
List of annotations associated with the image or batch of images. If annotionation is for object
|
||||
List of annotations associated with the image or batch of images. If annotation is for object
|
||||
detection, the annotations should be a dictionary with the following keys:
|
||||
- "image_id" (`int`): The image id.
|
||||
- "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
|
||||
dictionary. An image can have no annotations, in which case the list should be empty.
|
||||
If annotionation is for segmentation, the annotations should be a dictionary with the following keys:
|
||||
If annotation is for segmentation, the annotations should be a dictionary with the following keys:
|
||||
- "image_id" (`int`): The image id.
|
||||
- "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
|
||||
An image can have no segments, in which case the list should be empty.
|
||||
@@ -1072,7 +1065,7 @@ class YolosImageProcessor(BaseImageProcessor):
|
||||
Standard deviation to use when normalizing the image.
|
||||
do_pad (`bool`, *optional*, defaults to self.do_pad):
|
||||
Whether to pad the image.
|
||||
format (`str` or `AnnotionFormat`, *optional*, defaults to self.format):
|
||||
format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
|
||||
Format of the annotations.
|
||||
return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
|
||||
Type of tensors to return. If `None`, will return the list of images.
|
||||
@@ -1136,28 +1129,13 @@ class YolosImageProcessor(BaseImageProcessor):
|
||||
"torch.Tensor, tf.Tensor or jax.ndarray."
|
||||
)
|
||||
|
||||
format = AnnotionFormat(format)
|
||||
format = AnnotationFormat(format)
|
||||
if annotations is not None:
|
||||
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
|
||||
"being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format == AnnotionFormat.COCO_PANOPTIC and not valid_coco_panoptic_annotations(annotations):
|
||||
raise ValueError(
|
||||
"Invalid COCO panoptic annotations. Annotations must a dict (single image) of list of dicts "
|
||||
"(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
|
||||
"the latter being a list of annotations in the COCO format."
|
||||
)
|
||||
elif format not in SUPPORTED_ANNOTATION_FORMATS:
|
||||
raise ValueError(
|
||||
f"Unsupported annotation format: {format} must be one of {SUPPORTED_ANNOTATION_FORMATS}"
|
||||
)
|
||||
validate_annotations(format, SUPPORTED_ANNOTATION_FORMATS, annotations)
|
||||
|
||||
if (
|
||||
masks_path is not None
|
||||
and format == AnnotionFormat.COCO_PANOPTIC
|
||||
and format == AnnotationFormat.COCO_PANOPTIC
|
||||
and not isinstance(masks_path, (pathlib.Path, str))
|
||||
):
|
||||
raise ValueError(
|
||||
|
||||
@@ -21,7 +21,7 @@ import unittest
|
||||
from transformers.testing_utils import require_torch, require_vision, slow
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
|
||||
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -127,7 +127,7 @@ class ConditionalDetrImageProcessingTester(unittest.TestCase):
|
||||
|
||||
@require_torch
|
||||
@require_vision
|
||||
class ConditionalDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
class ConditionalDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
|
||||
image_processing_class = ConditionalDetrImageProcessor if is_vision_available() else None
|
||||
|
||||
def setUp(self):
|
||||
|
||||
@@ -21,7 +21,7 @@ import unittest
|
||||
from transformers.testing_utils import require_torch, require_vision, slow
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
|
||||
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -127,7 +127,7 @@ class DeformableDetrImageProcessingTester(unittest.TestCase):
|
||||
|
||||
@require_torch
|
||||
@require_vision
|
||||
class DeformableDetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
class DeformableDetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
|
||||
image_processing_class = DeformableDetrImageProcessor if is_vision_available() else None
|
||||
|
||||
def setUp(self):
|
||||
|
||||
@@ -21,7 +21,7 @@ import unittest
|
||||
from transformers.testing_utils import require_torch, require_vision, slow
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
|
||||
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -127,7 +127,7 @@ class DetaImageProcessingTester(unittest.TestCase):
|
||||
|
||||
@require_torch
|
||||
@require_vision
|
||||
class DetaImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
class DetaImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
|
||||
image_processing_class = DetaImageProcessor if is_vision_available() else None
|
||||
|
||||
def setUp(self):
|
||||
|
||||
@@ -21,7 +21,7 @@ import unittest
|
||||
from transformers.testing_utils import require_torch, require_vision, slow
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
|
||||
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -127,7 +127,7 @@ class DetrImageProcessingTester(unittest.TestCase):
|
||||
|
||||
@require_torch
|
||||
@require_vision
|
||||
class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
class DetrImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
|
||||
image_processing_class = DetrImageProcessor if is_vision_available() else None
|
||||
|
||||
def setUp(self):
|
||||
@@ -159,6 +159,63 @@ class DetrImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
self.assertEqual(image_processor.size, {"shortest_edge": 42, "longest_edge": 84})
|
||||
self.assertEqual(image_processor.do_pad, False)
|
||||
|
||||
def test_should_raise_if_annotation_format_invalid(self):
|
||||
image_processor_dict = self.image_processor_tester.prepare_image_processor_dict()
|
||||
|
||||
with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f:
|
||||
detection_target = json.loads(f.read())
|
||||
|
||||
annotations = {"image_id": 39769, "annotations": detection_target}
|
||||
|
||||
params = {
|
||||
"images": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
|
||||
"annotations": annotations,
|
||||
"return_tensors": "pt",
|
||||
}
|
||||
|
||||
image_processor_params = {**image_processor_dict, **{"format": "_INVALID_FORMAT_"}}
|
||||
image_processor = self.image_processing_class(**image_processor_params)
|
||||
|
||||
with self.assertRaises(ValueError) as e:
|
||||
image_processor(**params)
|
||||
|
||||
self.assertTrue(str(e.exception).startswith("_INVALID_FORMAT_ is not a valid AnnotationFormat"))
|
||||
|
||||
def test_valid_coco_detection_annotations(self):
|
||||
# prepare image and target
|
||||
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
|
||||
with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f:
|
||||
target = json.loads(f.read())
|
||||
|
||||
params = {"image_id": 39769, "annotations": target}
|
||||
|
||||
# encode them
|
||||
image_processing = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
|
||||
|
||||
# legal encodings (single image)
|
||||
_ = image_processing(images=image, annotations=params, return_tensors="pt")
|
||||
_ = image_processing(images=image, annotations=[params], return_tensors="pt")
|
||||
|
||||
# legal encodings (batch of one image)
|
||||
_ = image_processing(images=[image], annotations=params, return_tensors="pt")
|
||||
_ = image_processing(images=[image], annotations=[params], return_tensors="pt")
|
||||
|
||||
# legal encoding (batch of more than one image)
|
||||
n = 5
|
||||
_ = image_processing(images=[image] * n, annotations=[params] * n, return_tensors="pt")
|
||||
|
||||
# example of an illegal encoding (missing the 'image_id' key)
|
||||
with self.assertRaises(ValueError) as e:
|
||||
image_processing(images=image, annotations={"annotations": target}, return_tensors="pt")
|
||||
|
||||
self.assertTrue(str(e.exception).startswith("Invalid COCO detection annotations"))
|
||||
|
||||
# example of an illegal encoding (unequal lengths of images and annotations)
|
||||
with self.assertRaises(ValueError) as e:
|
||||
image_processing(images=[image] * n, annotations=[params] * (n - 1), return_tensors="pt")
|
||||
|
||||
self.assertTrue(str(e.exception) == "The number of images (5) and annotations (4) do not match.")
|
||||
|
||||
@slow
|
||||
def test_call_pytorch_with_coco_detection_annotations(self):
|
||||
# prepare image and target
|
||||
|
||||
@@ -21,7 +21,7 @@ import unittest
|
||||
from transformers.testing_utils import require_torch, require_vision, slow
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
|
||||
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -127,7 +127,7 @@ class YolosImageProcessingTester(unittest.TestCase):
|
||||
|
||||
@require_torch
|
||||
@require_vision
|
||||
class YolosImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
|
||||
image_processing_class = YolosImageProcessor if is_vision_available() else None
|
||||
|
||||
def setUp(self):
|
||||
|
||||
@@ -15,8 +15,11 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import tempfile
|
||||
|
||||
from transformers import BatchFeature
|
||||
from transformers.image_utils import AnnotationFormat, AnnotionFormat
|
||||
from transformers.testing_utils import check_json_file_has_correct_format, require_torch, require_vision
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
@@ -285,3 +288,81 @@ class ImageProcessingTestMixin:
|
||||
self.assertEqual(
|
||||
tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape)
|
||||
)
|
||||
|
||||
|
||||
class AnnotationFormatTestMixin:
|
||||
# this mixin adds a test to assert that usages of the
|
||||
# to-be-deprecated `AnnotionFormat` continue to be
|
||||
# supported for the time being
|
||||
|
||||
def test_processor_can_use_legacy_annotation_format(self):
|
||||
image_processor_dict = self.image_processor_tester.prepare_image_processor_dict()
|
||||
fixtures_path = pathlib.Path(__file__).parent / "fixtures" / "tests_samples" / "COCO"
|
||||
|
||||
with open(fixtures_path / "coco_annotations.txt", "r") as f:
|
||||
detection_target = json.loads(f.read())
|
||||
|
||||
detection_annotations = {"image_id": 39769, "annotations": detection_target}
|
||||
|
||||
detection_params = {
|
||||
"images": Image.open(fixtures_path / "000000039769.png"),
|
||||
"annotations": detection_annotations,
|
||||
"return_tensors": "pt",
|
||||
}
|
||||
|
||||
with open(fixtures_path / "coco_panoptic_annotations.txt", "r") as f:
|
||||
panoptic_target = json.loads(f.read())
|
||||
|
||||
panoptic_annotations = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": panoptic_target}
|
||||
|
||||
masks_path = pathlib.Path(fixtures_path / "coco_panoptic")
|
||||
|
||||
panoptic_params = {
|
||||
"images": Image.open(fixtures_path / "000000039769.png"),
|
||||
"annotations": panoptic_annotations,
|
||||
"return_tensors": "pt",
|
||||
"masks_path": masks_path,
|
||||
}
|
||||
|
||||
test_cases = [
|
||||
("coco_detection", detection_params),
|
||||
("coco_panoptic", panoptic_params),
|
||||
(AnnotionFormat.COCO_DETECTION, detection_params),
|
||||
(AnnotionFormat.COCO_PANOPTIC, panoptic_params),
|
||||
(AnnotationFormat.COCO_DETECTION, detection_params),
|
||||
(AnnotationFormat.COCO_PANOPTIC, panoptic_params),
|
||||
]
|
||||
|
||||
def _compare(a, b) -> None:
|
||||
if isinstance(a, (dict, BatchFeature)):
|
||||
self.assertEqual(a.keys(), b.keys())
|
||||
for k, v in a.items():
|
||||
_compare(v, b[k])
|
||||
elif isinstance(a, list):
|
||||
self.assertEqual(len(a), len(b))
|
||||
for idx in range(len(a)):
|
||||
_compare(a[idx], b[idx])
|
||||
elif isinstance(a, torch.Tensor):
|
||||
self.assertTrue(torch.allclose(a, b, atol=1e-3))
|
||||
elif isinstance(a, str):
|
||||
self.assertEqual(a, b)
|
||||
|
||||
for annotation_format, params in test_cases:
|
||||
with self.subTest(annotation_format):
|
||||
image_processor_params = {**image_processor_dict, **{"format": annotation_format}}
|
||||
image_processor_first = self.image_processing_class(**image_processor_params)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
image_processor_first.save_pretrained(tmpdirname)
|
||||
image_processor_second = self.image_processing_class.from_pretrained(tmpdirname)
|
||||
|
||||
# check the 'format' key exists and that the dicts of the
|
||||
# first and second processors are equal
|
||||
self.assertIn("format", image_processor_first.to_dict().keys())
|
||||
self.assertEqual(image_processor_second.to_dict(), image_processor_first.to_dict())
|
||||
|
||||
# perform encoding using both processors and compare
|
||||
# the resulting BatchFeatures
|
||||
first_encoding = image_processor_first(**params)
|
||||
second_encoding = image_processor_second(**params)
|
||||
_compare(first_encoding, second_encoding)
|
||||
|
||||
Reference in New Issue
Block a user