Fix image segmentation pipeline errors, resolve backward compatibility issues (#19768)
* Fix panoptic segmentation and pipeline * Update ImageSegmentationPipeline tests and reenable test_small_model_pt * Resolve backward compatibility issues
This commit is contained in:
@@ -190,13 +190,13 @@ def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_
|
|||||||
return masks[to_keep], scores[to_keep], labels[to_keep]
|
return masks[to_keep], scores[to_keep], labels[to_keep]
|
||||||
|
|
||||||
|
|
||||||
def check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_threshold=0.8):
|
def check_segment_validity(mask_labels, mask_probs, k, mask_threshold=0.5, overlap_mask_area_threshold=0.8):
|
||||||
# Get the mask associated with the k class
|
# Get the mask associated with the k class
|
||||||
mask_k = mask_labels == k
|
mask_k = mask_labels == k
|
||||||
mask_k_area = mask_k.sum()
|
mask_k_area = mask_k.sum()
|
||||||
|
|
||||||
# Compute the area of all the stuff in query k
|
# Compute the area of all the stuff in query k
|
||||||
original_area = (mask_probs[k] >= 0.5).sum()
|
original_area = (mask_probs[k] >= mask_threshold).sum()
|
||||||
mask_exists = mask_k_area > 0 and original_area > 0
|
mask_exists = mask_k_area > 0 and original_area > 0
|
||||||
|
|
||||||
# Eliminate disconnected tiny segments
|
# Eliminate disconnected tiny segments
|
||||||
@@ -212,6 +212,7 @@ def compute_segments(
|
|||||||
mask_probs,
|
mask_probs,
|
||||||
pred_scores,
|
pred_scores,
|
||||||
pred_labels,
|
pred_labels,
|
||||||
|
mask_threshold: float = 0.5,
|
||||||
overlap_mask_area_threshold: float = 0.8,
|
overlap_mask_area_threshold: float = 0.8,
|
||||||
label_ids_to_fuse: Optional[Set[int]] = None,
|
label_ids_to_fuse: Optional[Set[int]] = None,
|
||||||
target_size: Tuple[int, int] = None,
|
target_size: Tuple[int, int] = None,
|
||||||
@@ -240,7 +241,9 @@ def compute_segments(
|
|||||||
should_fuse = pred_class in label_ids_to_fuse
|
should_fuse = pred_class in label_ids_to_fuse
|
||||||
|
|
||||||
# Check if mask exists and large enough to be a segment
|
# Check if mask exists and large enough to be a segment
|
||||||
mask_exists, mask_k = check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_threshold)
|
mask_exists, mask_k = check_segment_validity(
|
||||||
|
mask_labels, mask_probs, k, mask_threshold, overlap_mask_area_threshold
|
||||||
|
)
|
||||||
|
|
||||||
if mask_exists:
|
if mask_exists:
|
||||||
if pred_class in stuff_memory_list:
|
if pred_class in stuff_memory_list:
|
||||||
@@ -1210,6 +1213,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
|||||||
self,
|
self,
|
||||||
outputs,
|
outputs,
|
||||||
threshold: float = 0.5,
|
threshold: float = 0.5,
|
||||||
|
mask_threshold: float = 0.5,
|
||||||
overlap_mask_area_threshold: float = 0.8,
|
overlap_mask_area_threshold: float = 0.8,
|
||||||
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
||||||
return_coco_annotation: Optional[bool] = False,
|
return_coco_annotation: Optional[bool] = False,
|
||||||
@@ -1221,6 +1225,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
|||||||
Raw outputs of the model.
|
Raw outputs of the model.
|
||||||
threshold (`float`, *optional*, defaults to 0.5):
|
threshold (`float`, *optional*, defaults to 0.5):
|
||||||
The probability score threshold to keep predicted instance masks.
|
The probability score threshold to keep predicted instance masks.
|
||||||
|
mask_threshold (`float`, *optional*, defaults to 0.5):
|
||||||
|
Threshold to use when turning the predicted masks into binary values.
|
||||||
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
|
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
|
||||||
The overlap mask area threshold to merge or discard small disconnected parts within each binary
|
The overlap mask area threshold to merge or discard small disconnected parts within each binary
|
||||||
instance mask.
|
instance mask.
|
||||||
@@ -1272,6 +1278,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
|||||||
mask_probs_item,
|
mask_probs_item,
|
||||||
pred_scores_item,
|
pred_scores_item,
|
||||||
pred_labels_item,
|
pred_labels_item,
|
||||||
|
mask_threshold,
|
||||||
overlap_mask_area_threshold,
|
overlap_mask_area_threshold,
|
||||||
target_size,
|
target_size,
|
||||||
)
|
)
|
||||||
@@ -1287,6 +1294,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
|||||||
self,
|
self,
|
||||||
outputs,
|
outputs,
|
||||||
threshold: float = 0.5,
|
threshold: float = 0.5,
|
||||||
|
mask_threshold: float = 0.5,
|
||||||
overlap_mask_area_threshold: float = 0.8,
|
overlap_mask_area_threshold: float = 0.8,
|
||||||
label_ids_to_fuse: Optional[Set[int]] = None,
|
label_ids_to_fuse: Optional[Set[int]] = None,
|
||||||
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
||||||
@@ -1299,6 +1307,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
|||||||
The outputs from [`DetrForSegmentation`].
|
The outputs from [`DetrForSegmentation`].
|
||||||
threshold (`float`, *optional*, defaults to 0.5):
|
threshold (`float`, *optional*, defaults to 0.5):
|
||||||
The probability score threshold to keep predicted instance masks.
|
The probability score threshold to keep predicted instance masks.
|
||||||
|
mask_threshold (`float`, *optional*, defaults to 0.5):
|
||||||
|
Threshold to use when turning the predicted masks into binary values.
|
||||||
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
|
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
|
||||||
The overlap mask area threshold to merge or discard small disconnected parts within each binary
|
The overlap mask area threshold to merge or discard small disconnected parts within each binary
|
||||||
instance mask.
|
instance mask.
|
||||||
@@ -1359,6 +1369,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
|||||||
mask_probs_item,
|
mask_probs_item,
|
||||||
pred_scores_item,
|
pred_scores_item,
|
||||||
pred_labels_item,
|
pred_labels_item,
|
||||||
|
mask_threshold,
|
||||||
overlap_mask_area_threshold,
|
overlap_mask_area_threshold,
|
||||||
label_ids_to_fuse,
|
label_ids_to_fuse,
|
||||||
target_size,
|
target_size,
|
||||||
|
|||||||
@@ -37,15 +37,14 @@ if is_torch_available():
|
|||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Copied from transformers.models.detr.feature_extraction_detr.binary_mask_to_rle
|
||||||
def binary_mask_to_rle(mask):
|
def binary_mask_to_rle(mask):
|
||||||
"""
|
"""
|
||||||
Converts given binary mask of shape (height, width) to the run-length encoding (RLE) format.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
Converts given binary mask of shape (height, width) to the run-length encoding (RLE) format.
|
||||||
mask (`torch.Tensor` or `numpy.array`):
|
mask (`torch.Tensor` or `numpy.array`):
|
||||||
A binary mask tensor of shape `(height, width)` where 0 denotes background and 1 denotes the target
|
A binary mask tensor of shape `(height, width)` where 0 denotes background and 1 denotes the target
|
||||||
segment_id or class_id.
|
segment_id or class_id.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
`List`: Run-length encoded list of the binary mask. Refer to COCO API for more information about the RLE
|
`List`: Run-length encoded list of the binary mask. Refer to COCO API for more information about the RLE
|
||||||
format.
|
format.
|
||||||
@@ -60,6 +59,7 @@ def binary_mask_to_rle(mask):
|
|||||||
return [x for x in runs]
|
return [x for x in runs]
|
||||||
|
|
||||||
|
|
||||||
|
# Copied from transformers.models.detr.feature_extraction_detr.convert_segmentation_to_rle
|
||||||
def convert_segmentation_to_rle(segmentation):
|
def convert_segmentation_to_rle(segmentation):
|
||||||
"""
|
"""
|
||||||
Converts given segmentation map of shape (height, width) to the run-length encoding (RLE) format.
|
Converts given segmentation map of shape (height, width) to the run-length encoding (RLE) format.
|
||||||
@@ -67,7 +67,6 @@ def convert_segmentation_to_rle(segmentation):
|
|||||||
Args:
|
Args:
|
||||||
segmentation (`torch.Tensor` or `numpy.array`):
|
segmentation (`torch.Tensor` or `numpy.array`):
|
||||||
A segmentation map of shape `(height, width)` where each value denotes a segment or class id.
|
A segmentation map of shape `(height, width)` where each value denotes a segment or class id.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
`List[List]`: A list of lists, where each list is the run-length encoding of a segment / class id.
|
`List[List]`: A list of lists, where each list is the run-length encoding of a segment / class id.
|
||||||
"""
|
"""
|
||||||
@@ -82,6 +81,7 @@ def convert_segmentation_to_rle(segmentation):
|
|||||||
return run_length_encodings
|
return run_length_encodings
|
||||||
|
|
||||||
|
|
||||||
|
# Copied from transformers.models.detr.feature_extraction_detr.remove_low_and_no_objects
|
||||||
def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_labels):
|
def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_labels):
|
||||||
"""
|
"""
|
||||||
Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and
|
Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and
|
||||||
@@ -96,10 +96,8 @@ def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_
|
|||||||
A tensor of shape `(num_queries)`.
|
A tensor of shape `(num_queries)`.
|
||||||
object_mask_threshold (`float`):
|
object_mask_threshold (`float`):
|
||||||
A number between 0 and 1 used to binarize the masks.
|
A number between 0 and 1 used to binarize the masks.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
`ValueError`: Raised when the first dimension doesn't match in all input tensors.
|
`ValueError`: Raised when the first dimension doesn't match in all input tensors.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
`Tuple[`torch.Tensor`, `torch.Tensor`, `torch.Tensor`]`: The `masks`, `scores` and `labels` without the region
|
`Tuple[`torch.Tensor`, `torch.Tensor`, `torch.Tensor`]`: The `masks`, `scores` and `labels` without the region
|
||||||
< `object_mask_threshold`.
|
< `object_mask_threshold`.
|
||||||
@@ -108,16 +106,18 @@ def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_
|
|||||||
raise ValueError("mask, scores and labels must have the same shape!")
|
raise ValueError("mask, scores and labels must have the same shape!")
|
||||||
|
|
||||||
to_keep = labels.ne(num_labels) & (scores > object_mask_threshold)
|
to_keep = labels.ne(num_labels) & (scores > object_mask_threshold)
|
||||||
|
|
||||||
return masks[to_keep], scores[to_keep], labels[to_keep]
|
return masks[to_keep], scores[to_keep], labels[to_keep]
|
||||||
|
|
||||||
|
|
||||||
def check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_threshold=0.8):
|
# Copied from transformers.models.detr.feature_extraction_detr.check_segment_validity
|
||||||
|
def check_segment_validity(mask_labels, mask_probs, k, mask_threshold=0.5, overlap_mask_area_threshold=0.8):
|
||||||
# Get the mask associated with the k class
|
# Get the mask associated with the k class
|
||||||
mask_k = mask_labels == k
|
mask_k = mask_labels == k
|
||||||
mask_k_area = mask_k.sum()
|
mask_k_area = mask_k.sum()
|
||||||
|
|
||||||
# Compute the area of all the stuff in query k
|
# Compute the area of all the stuff in query k
|
||||||
original_area = (mask_probs[k] >= 0.5).sum()
|
original_area = (mask_probs[k] >= mask_threshold).sum()
|
||||||
mask_exists = mask_k_area > 0 and original_area > 0
|
mask_exists = mask_k_area > 0 and original_area > 0
|
||||||
|
|
||||||
# Eliminate disconnected tiny segments
|
# Eliminate disconnected tiny segments
|
||||||
@@ -129,10 +129,12 @@ def check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_thresho
|
|||||||
return mask_exists, mask_k
|
return mask_exists, mask_k
|
||||||
|
|
||||||
|
|
||||||
|
# Copied from transformers.models.detr.feature_extraction_detr.compute_segments
|
||||||
def compute_segments(
|
def compute_segments(
|
||||||
mask_probs,
|
mask_probs,
|
||||||
pred_scores,
|
pred_scores,
|
||||||
pred_labels,
|
pred_labels,
|
||||||
|
mask_threshold: float = 0.5,
|
||||||
overlap_mask_area_threshold: float = 0.8,
|
overlap_mask_area_threshold: float = 0.8,
|
||||||
label_ids_to_fuse: Optional[Set[int]] = None,
|
label_ids_to_fuse: Optional[Set[int]] = None,
|
||||||
target_size: Tuple[int, int] = None,
|
target_size: Tuple[int, int] = None,
|
||||||
@@ -144,7 +146,9 @@ def compute_segments(
|
|||||||
segments: List[Dict] = []
|
segments: List[Dict] = []
|
||||||
|
|
||||||
if target_size is not None:
|
if target_size is not None:
|
||||||
mask_probs = interpolate(mask_probs.unsqueeze(0), size=target_size, mode="bilinear", align_corners=False)[0]
|
mask_probs = nn.functional.interpolate(
|
||||||
|
mask_probs.unsqueeze(0), size=target_size, mode="bilinear", align_corners=False
|
||||||
|
)[0]
|
||||||
|
|
||||||
current_segment_id = 0
|
current_segment_id = 0
|
||||||
|
|
||||||
@@ -159,7 +163,9 @@ def compute_segments(
|
|||||||
should_fuse = pred_class in label_ids_to_fuse
|
should_fuse = pred_class in label_ids_to_fuse
|
||||||
|
|
||||||
# Check if mask exists and large enough to be a segment
|
# Check if mask exists and large enough to be a segment
|
||||||
mask_exists, mask_k = check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_threshold)
|
mask_exists, mask_k = check_segment_validity(
|
||||||
|
mask_labels, mask_probs, k, mask_threshold, overlap_mask_area_threshold
|
||||||
|
)
|
||||||
|
|
||||||
if mask_exists:
|
if mask_exists:
|
||||||
if pred_class in stuff_memory_list:
|
if pred_class in stuff_memory_list:
|
||||||
@@ -722,6 +728,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
|||||||
self,
|
self,
|
||||||
outputs,
|
outputs,
|
||||||
threshold: float = 0.5,
|
threshold: float = 0.5,
|
||||||
|
mask_threshold: float = 0.5,
|
||||||
overlap_mask_area_threshold: float = 0.8,
|
overlap_mask_area_threshold: float = 0.8,
|
||||||
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
||||||
return_coco_annotation: Optional[bool] = False,
|
return_coco_annotation: Optional[bool] = False,
|
||||||
@@ -735,6 +742,8 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
|||||||
Raw outputs of the model.
|
Raw outputs of the model.
|
||||||
threshold (`float`, *optional*, defaults to 0.5):
|
threshold (`float`, *optional*, defaults to 0.5):
|
||||||
The probability score threshold to keep predicted instance masks.
|
The probability score threshold to keep predicted instance masks.
|
||||||
|
mask_threshold (`float`, *optional*, defaults to 0.5):
|
||||||
|
Threshold to use when turning the predicted masks into binary values.
|
||||||
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
|
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
|
||||||
The overlap mask area threshold to merge or discard small disconnected parts within each binary
|
The overlap mask area threshold to merge or discard small disconnected parts within each binary
|
||||||
instance mask.
|
instance mask.
|
||||||
@@ -786,6 +795,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
|||||||
mask_probs_item,
|
mask_probs_item,
|
||||||
pred_scores_item,
|
pred_scores_item,
|
||||||
pred_labels_item,
|
pred_labels_item,
|
||||||
|
mask_threshold,
|
||||||
overlap_mask_area_threshold,
|
overlap_mask_area_threshold,
|
||||||
target_size,
|
target_size,
|
||||||
)
|
)
|
||||||
@@ -801,6 +811,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
|||||||
self,
|
self,
|
||||||
outputs,
|
outputs,
|
||||||
threshold: float = 0.5,
|
threshold: float = 0.5,
|
||||||
|
mask_threshold: float = 0.5,
|
||||||
overlap_mask_area_threshold: float = 0.8,
|
overlap_mask_area_threshold: float = 0.8,
|
||||||
label_ids_to_fuse: Optional[Set[int]] = None,
|
label_ids_to_fuse: Optional[Set[int]] = None,
|
||||||
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
target_sizes: Optional[List[Tuple[int, int]]] = None,
|
||||||
@@ -814,6 +825,8 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
|||||||
The outputs from [`MaskFormerForInstanceSegmentation`].
|
The outputs from [`MaskFormerForInstanceSegmentation`].
|
||||||
threshold (`float`, *optional*, defaults to 0.5):
|
threshold (`float`, *optional*, defaults to 0.5):
|
||||||
The probability score threshold to keep predicted instance masks.
|
The probability score threshold to keep predicted instance masks.
|
||||||
|
mask_threshold (`float`, *optional*, defaults to 0.5):
|
||||||
|
Threshold to use when turning the predicted masks into binary values.
|
||||||
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
|
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
|
||||||
The overlap mask area threshold to merge or discard small disconnected parts within each binary
|
The overlap mask area threshold to merge or discard small disconnected parts within each binary
|
||||||
instance mask.
|
instance mask.
|
||||||
@@ -875,6 +888,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
|||||||
mask_probs_item,
|
mask_probs_item,
|
||||||
pred_scores_item,
|
pred_scores_item,
|
||||||
pred_labels_item,
|
pred_labels_item,
|
||||||
|
mask_threshold,
|
||||||
overlap_mask_area_threshold,
|
overlap_mask_area_threshold,
|
||||||
label_ids_to_fuse,
|
label_ids_to_fuse,
|
||||||
target_size,
|
target_size,
|
||||||
|
|||||||
@@ -60,6 +60,8 @@ class ImageSegmentationPipeline(Pipeline):
|
|||||||
postprocess_kwargs["task"] = kwargs["task"]
|
postprocess_kwargs["task"] = kwargs["task"]
|
||||||
if "threshold" in kwargs:
|
if "threshold" in kwargs:
|
||||||
postprocess_kwargs["threshold"] = kwargs["threshold"]
|
postprocess_kwargs["threshold"] = kwargs["threshold"]
|
||||||
|
if "mask_threshold" in kwargs:
|
||||||
|
postprocess_kwargs["mask_threshold"] = kwargs["mask_threshold"]
|
||||||
if "overlap_mask_area_threshold" in kwargs:
|
if "overlap_mask_area_threshold" in kwargs:
|
||||||
postprocess_kwargs["overlap_mask_area_threshold"] = kwargs["overlap_mask_area_threshold"]
|
postprocess_kwargs["overlap_mask_area_threshold"] = kwargs["overlap_mask_area_threshold"]
|
||||||
return {}, {}, postprocess_kwargs
|
return {}, {}, postprocess_kwargs
|
||||||
@@ -78,11 +80,13 @@ class ImageSegmentationPipeline(Pipeline):
|
|||||||
|
|
||||||
The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
|
The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
|
||||||
same format: all as HTTP(S) links, all as local paths, or all as PIL images.
|
same format: all as HTTP(S) links, all as local paths, or all as PIL images.
|
||||||
task (`str`, defaults to `semantic`):
|
subtask (`str`, defaults to `panoptic`):
|
||||||
Segmentation task to be performed, choose [`semantic`, `instance` and `panoptic`] depending on model
|
Segmentation task to be performed, choose [`semantic`, `instance` and `panoptic`] depending on model
|
||||||
capabilities.
|
capabilities.
|
||||||
threshold (`float`, *optional*, defaults to 0.9):
|
threshold (`float`, *optional*, defaults to 0.9):
|
||||||
Probability threshold to filter out predicted masks.
|
Probability threshold to filter out predicted masks.
|
||||||
|
mask_threshold (`float`, *optional*, defaults to 0.5):
|
||||||
|
Threshold to use when turning the predicted masks into binary values.
|
||||||
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.5):
|
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.5):
|
||||||
Mask overlap threshold to eliminate small, disconnected segments.
|
Mask overlap threshold to eliminate small, disconnected segments.
|
||||||
|
|
||||||
@@ -116,11 +120,16 @@ class ImageSegmentationPipeline(Pipeline):
|
|||||||
model_outputs["target_size"] = target_size
|
model_outputs["target_size"] = target_size
|
||||||
return model_outputs
|
return model_outputs
|
||||||
|
|
||||||
def postprocess(self, model_outputs, task="semantic", threshold=0.9, overlap_mask_area_threshold=0.5):
|
def postprocess(
|
||||||
if task == "instance" and hasattr(self.feature_extractor, "post_process_instance_segmentation"):
|
self, model_outputs, subtask=None, threshold=0.9, mask_threshold=0.5, overlap_mask_area_threshold=0.5
|
||||||
|
):
|
||||||
|
if (subtask == "panoptic" or subtask is None) and hasattr(
|
||||||
|
self.feature_extractor, "post_process_panoptic_segmentation"
|
||||||
|
):
|
||||||
outputs = self.feature_extractor.post_process_panoptic_segmentation(
|
outputs = self.feature_extractor.post_process_panoptic_segmentation(
|
||||||
model_outputs,
|
model_outputs,
|
||||||
threshold=threshold,
|
threshold=threshold,
|
||||||
|
mask_threshold=mask_threshold,
|
||||||
overlap_mask_area_threshold=overlap_mask_area_threshold,
|
overlap_mask_area_threshold=overlap_mask_area_threshold,
|
||||||
target_sizes=model_outputs["target_size"],
|
target_sizes=model_outputs["target_size"],
|
||||||
)[0]
|
)[0]
|
||||||
@@ -130,29 +139,7 @@ class ImageSegmentationPipeline(Pipeline):
|
|||||||
|
|
||||||
if len(outputs["segments_info"]) == 0:
|
if len(outputs["segments_info"]) == 0:
|
||||||
mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L")
|
mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L")
|
||||||
annotation.append({"mask": mask, "label": None, "score": 0.0})
|
annotation.append({"mask": mask, "label": "NULL", "score": 0.0})
|
||||||
else:
|
|
||||||
for segment in outputs["segments_info"]:
|
|
||||||
mask = (segmentation == segment["id"]) * 255
|
|
||||||
mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L")
|
|
||||||
label = self.model.config.id2label[segment["label_id"]]
|
|
||||||
score = segment["score"]
|
|
||||||
annotation.append({"mask": mask, "label": label, "score": score})
|
|
||||||
|
|
||||||
elif task == "panoptic" and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"):
|
|
||||||
outputs = self.feature_extractor.post_process_panoptic_segmentation(
|
|
||||||
model_outputs,
|
|
||||||
threshold=threshold,
|
|
||||||
overlap_mask_area_threshold=overlap_mask_area_threshold,
|
|
||||||
target_sizes=model_outputs["target_size"],
|
|
||||||
)[0]
|
|
||||||
|
|
||||||
annotation = []
|
|
||||||
segmentation = outputs["segmentation"]
|
|
||||||
|
|
||||||
if len(outputs["segments_info"]) == 0:
|
|
||||||
mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L")
|
|
||||||
annotation.append({"mask": mask, "label": None, "score": 0.0})
|
|
||||||
else:
|
else:
|
||||||
for segment in outputs["segments_info"]:
|
for segment in outputs["segments_info"]:
|
||||||
mask = (segmentation == segment["id"]) * 255
|
mask = (segmentation == segment["id"]) * 255
|
||||||
@@ -161,7 +148,34 @@ class ImageSegmentationPipeline(Pipeline):
|
|||||||
score = segment["score"]
|
score = segment["score"]
|
||||||
annotation.append({"score": score, "label": label, "mask": mask})
|
annotation.append({"score": score, "label": label, "mask": mask})
|
||||||
|
|
||||||
elif task == "semantic" and hasattr(self.feature_extractor, "post_process_semantic_segmentation"):
|
elif (subtask == "instance" or subtask is None) and hasattr(
|
||||||
|
self.feature_extractor, "post_process_instance_segmentation"
|
||||||
|
):
|
||||||
|
outputs = self.feature_extractor.post_process_instance_segmentation(
|
||||||
|
model_outputs,
|
||||||
|
threshold=threshold,
|
||||||
|
mask_threshold=mask_threshold,
|
||||||
|
overlap_mask_area_threshold=overlap_mask_area_threshold,
|
||||||
|
target_sizes=model_outputs["target_size"],
|
||||||
|
)[0]
|
||||||
|
|
||||||
|
annotation = []
|
||||||
|
segmentation = outputs["segmentation"]
|
||||||
|
|
||||||
|
if len(outputs["segments_info"]) == 0:
|
||||||
|
mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L")
|
||||||
|
annotation.append({"mask": mask, "label": "NULL", "score": 0.0})
|
||||||
|
else:
|
||||||
|
for segment in outputs["segments_info"]:
|
||||||
|
mask = (segmentation == segment["id"]) * 255
|
||||||
|
mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L")
|
||||||
|
label = self.model.config.id2label[segment["label_id"]]
|
||||||
|
score = segment["score"]
|
||||||
|
annotation.append({"mask": mask, "label": label, "score": score})
|
||||||
|
|
||||||
|
elif (subtask == "semantic" or subtask is None) and hasattr(
|
||||||
|
self.feature_extractor, "post_process_semantic_segmentation"
|
||||||
|
):
|
||||||
outputs = self.feature_extractor.post_process_semantic_segmentation(
|
outputs = self.feature_extractor.post_process_semantic_segmentation(
|
||||||
model_outputs, target_sizes=model_outputs["target_size"]
|
model_outputs, target_sizes=model_outputs["target_size"]
|
||||||
)[0]
|
)[0]
|
||||||
@@ -176,5 +190,5 @@ class ImageSegmentationPipeline(Pipeline):
|
|||||||
label = self.model.config.id2label[label]
|
label = self.model.config.id2label[label]
|
||||||
annotation.append({"score": None, "label": label, "mask": mask})
|
annotation.append({"score": None, "label": label, "mask": mask})
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"task {task} is not supported for model {self.model}")
|
raise ValueError(f"Task {subtask} is not supported for model {self.model}.s")
|
||||||
return annotation
|
return annotation
|
||||||
|
|||||||
@@ -399,13 +399,11 @@ class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest
|
|||||||
|
|
||||||
self.assertEqual(segmentation[0].shape, target_sizes[0])
|
self.assertEqual(segmentation[0].shape, target_sizes[0])
|
||||||
|
|
||||||
@unittest.skip("Fix me Alara!")
|
|
||||||
def test_post_process_panoptic_segmentation(self):
|
def test_post_process_panoptic_segmentation(self):
|
||||||
feature_extractor = self.feature_extraction_class(num_labels=self.feature_extract_tester.num_classes)
|
feature_extractor = self.feature_extraction_class(num_labels=self.feature_extract_tester.num_classes)
|
||||||
outputs = self.feature_extract_tester.get_fake_maskformer_outputs()
|
outputs = self.feature_extract_tester.get_fake_maskformer_outputs()
|
||||||
segmentation = feature_extractor.post_process_panoptic_segmentation(outputs, threshold=0)
|
segmentation = feature_extractor.post_process_panoptic_segmentation(outputs, threshold=0)
|
||||||
print(len(segmentation))
|
|
||||||
print(self.feature_extract_tester.batch_size)
|
|
||||||
self.assertTrue(len(segmentation) == self.feature_extract_tester.batch_size)
|
self.assertTrue(len(segmentation) == self.feature_extract_tester.batch_size)
|
||||||
for el in segmentation:
|
for el in segmentation:
|
||||||
self.assertTrue("segmentation" in el)
|
self.assertTrue("segmentation" in el)
|
||||||
|
|||||||
@@ -81,7 +81,12 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
]
|
]
|
||||||
|
|
||||||
def run_pipeline_test(self, image_segmenter, examples):
|
def run_pipeline_test(self, image_segmenter, examples):
|
||||||
outputs = image_segmenter("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
|
outputs = image_segmenter(
|
||||||
|
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||||
|
threshold=0.0,
|
||||||
|
mask_threshold=0,
|
||||||
|
overlap_mask_area_threshold=0,
|
||||||
|
)
|
||||||
self.assertIsInstance(outputs, list)
|
self.assertIsInstance(outputs, list)
|
||||||
n = len(outputs)
|
n = len(outputs)
|
||||||
if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation)):
|
if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation)):
|
||||||
@@ -97,15 +102,15 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
|
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
|
||||||
|
|
||||||
# RGBA
|
# RGBA
|
||||||
outputs = image_segmenter(dataset[0]["file"])
|
outputs = image_segmenter(dataset[0]["file"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0)
|
||||||
m = len(outputs)
|
m = len(outputs)
|
||||||
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
|
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
|
||||||
# LA
|
# LA
|
||||||
outputs = image_segmenter(dataset[1]["file"])
|
outputs = image_segmenter(dataset[1]["file"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0)
|
||||||
m = len(outputs)
|
m = len(outputs)
|
||||||
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
|
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
|
||||||
# L
|
# L
|
||||||
outputs = image_segmenter(dataset[2]["file"])
|
outputs = image_segmenter(dataset[2]["file"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0)
|
||||||
m = len(outputs)
|
m = len(outputs)
|
||||||
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
|
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
|
||||||
|
|
||||||
@@ -126,7 +131,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||||
]
|
]
|
||||||
outputs = image_segmenter(batch, threshold=0.0, batch_size=batch_size)
|
outputs = image_segmenter(
|
||||||
|
batch, threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0, batch_size=batch_size
|
||||||
|
)
|
||||||
self.assertEqual(len(batch), len(outputs))
|
self.assertEqual(len(batch), len(outputs))
|
||||||
self.assertEqual(len(outputs[0]), n)
|
self.assertEqual(len(outputs[0]), n)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@@ -152,55 +159,29 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
|
|
||||||
model = AutoModelForImageSegmentation.from_pretrained(model_id)
|
model = AutoModelForImageSegmentation.from_pretrained(model_id)
|
||||||
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
|
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
|
||||||
image_segmenter = ImageSegmentationPipeline(
|
image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor)
|
||||||
model=model,
|
|
||||||
feature_extractor=feature_extractor,
|
|
||||||
task="semantic",
|
|
||||||
threshold=0.0,
|
|
||||||
overlap_mask_area_threshold=0.0,
|
|
||||||
)
|
|
||||||
|
|
||||||
outputs = image_segmenter(
|
outputs = image_segmenter(
|
||||||
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
||||||
|
subtask="panoptic",
|
||||||
|
threshold=0.0,
|
||||||
|
mask_threshold=0.0,
|
||||||
|
overlap_mask_area_threshold=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Shortening by hashing
|
# Shortening by hashing
|
||||||
for o in outputs:
|
for o in outputs:
|
||||||
o["mask"] = mask_to_test_readable(o["mask"])
|
o["mask"] = mask_to_test_readable(o["mask"])
|
||||||
|
|
||||||
# This is extremely brittle, and those values are made specific for the CI.
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
nested_simplify(outputs, decimals=4),
|
nested_simplify(outputs, decimals=4),
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"label": "LABEL_88",
|
"score": 0.004,
|
||||||
"mask": {"hash": "7f0bf661a4", "shape": (480, 640), "white_pixels": 3},
|
|
||||||
"score": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"label": "LABEL_101",
|
|
||||||
"mask": {"hash": "10ab738dc9", "shape": (480, 640), "white_pixels": 8948},
|
|
||||||
"score": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"label": "LABEL_215",
|
"label": "LABEL_215",
|
||||||
"mask": {"hash": "b431e0946c", "shape": (480, 640), "white_pixels": 298249},
|
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
|
||||||
"score": None,
|
|
||||||
},
|
},
|
||||||
]
|
],
|
||||||
# Temporary: Keeping around the old values as they might provide useful later
|
|
||||||
# [
|
|
||||||
# {
|
|
||||||
# "score": 0.004,
|
|
||||||
# "label": "LABEL_215",
|
|
||||||
# "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0},
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "score": 0.004,
|
|
||||||
# "label": "LABEL_215",
|
|
||||||
# "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0},
|
|
||||||
# },
|
|
||||||
# ],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
outputs = image_segmenter(
|
outputs = image_segmenter(
|
||||||
@@ -209,6 +190,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
||||||
],
|
],
|
||||||
threshold=0.0,
|
threshold=0.0,
|
||||||
|
mask_threshold=0.0,
|
||||||
|
overlap_mask_area_threshold=0.0,
|
||||||
)
|
)
|
||||||
for output in outputs:
|
for output in outputs:
|
||||||
for o in output:
|
for o in output:
|
||||||
@@ -219,62 +202,18 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
[
|
[
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"label": "LABEL_88",
|
"score": 0.004,
|
||||||
"mask": {"hash": "7f0bf661a4", "shape": (480, 640), "white_pixels": 3},
|
|
||||||
"score": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"label": "LABEL_101",
|
|
||||||
"mask": {"hash": "10ab738dc9", "shape": (480, 640), "white_pixels": 8948},
|
|
||||||
"score": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"label": "LABEL_215",
|
"label": "LABEL_215",
|
||||||
"mask": {"hash": "b431e0946c", "shape": (480, 640), "white_pixels": 298249},
|
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
|
||||||
"score": None,
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"label": "LABEL_88",
|
"score": 0.004,
|
||||||
"mask": {"hash": "7f0bf661a4", "shape": (480, 640), "white_pixels": 3},
|
|
||||||
"score": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"label": "LABEL_101",
|
|
||||||
"mask": {"hash": "10ab738dc9", "shape": (480, 640), "white_pixels": 8948},
|
|
||||||
"score": None,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"label": "LABEL_215",
|
"label": "LABEL_215",
|
||||||
"mask": {"hash": "b431e0946c", "shape": (480, 640), "white_pixels": 298249},
|
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
|
||||||
"score": None,
|
|
||||||
},
|
},
|
||||||
]
|
],
|
||||||
# [
|
|
||||||
# {
|
|
||||||
# "score": 0.004,
|
|
||||||
# "label": "LABEL_215",
|
|
||||||
# "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0},
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "score": 0.004,
|
|
||||||
# "label": "LABEL_215",
|
|
||||||
# "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0},
|
|
||||||
# },
|
|
||||||
# ],
|
|
||||||
# [
|
|
||||||
# {
|
|
||||||
# "score": 0.004,
|
|
||||||
# "label": "LABEL_215",
|
|
||||||
# "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0},
|
|
||||||
# },
|
|
||||||
# {
|
|
||||||
# "score": 0.004,
|
|
||||||
# "label": "LABEL_215",
|
|
||||||
# "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0},
|
|
||||||
# },
|
|
||||||
# ],
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -311,7 +250,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
|
|
||||||
outputs = image_segmenter(
|
outputs = image_segmenter(
|
||||||
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
||||||
task="panoptic",
|
subtask="panoptic",
|
||||||
threshold=0,
|
threshold=0,
|
||||||
overlap_mask_area_threshold=0.0,
|
overlap_mask_area_threshold=0.0,
|
||||||
)
|
)
|
||||||
@@ -361,7 +300,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
||||||
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
"http://images.cocodataset.org/val2017/000000039769.jpg",
|
||||||
],
|
],
|
||||||
task="panoptic",
|
subtask="panoptic",
|
||||||
threshold=0.0,
|
threshold=0.0,
|
||||||
overlap_mask_area_threshold=0.0,
|
overlap_mask_area_threshold=0.0,
|
||||||
)
|
)
|
||||||
@@ -448,7 +387,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
image_segmenter = pipeline("image-segmentation", model=model_id)
|
image_segmenter = pipeline("image-segmentation", model=model_id)
|
||||||
|
|
||||||
outputs = image_segmenter(
|
outputs = image_segmenter(
|
||||||
"http://images.cocodataset.org/val2017/000000039769.jpg", task="panoptic", threshold=0.999
|
"http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic", threshold=0.999
|
||||||
)
|
)
|
||||||
# Shortening by hashing
|
# Shortening by hashing
|
||||||
for o in outputs:
|
for o in outputs:
|
||||||
@@ -471,7 +410,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
)
|
)
|
||||||
|
|
||||||
outputs = image_segmenter(
|
outputs = image_segmenter(
|
||||||
"http://images.cocodataset.org/val2017/000000039769.jpg", task="panoptic", threshold=0.5
|
"http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic", threshold=0.5
|
||||||
)
|
)
|
||||||
|
|
||||||
for o in outputs:
|
for o in outputs:
|
||||||
@@ -521,7 +460,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
|||||||
|
|
||||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||||
file = image[0]["file"]
|
file = image[0]["file"]
|
||||||
outputs = image_segmenter(file, task="panoptic", threshold=threshold)
|
outputs = image_segmenter(file, subtask="panoptic", threshold=threshold)
|
||||||
|
|
||||||
# Shortening by hashing
|
# Shortening by hashing
|
||||||
for o in outputs:
|
for o in outputs:
|
||||||
|
|||||||
Reference in New Issue
Block a user