Fix post process function called in the instance segmentation example of mask2former (#34588)

* Fix post process function called in the instance segmentation example of mask2former

* fix description and additional notes for post_process_instance_segmentation of maskformers

* remove white space in maskformers post_process_instance_segmentation doc

* change image.size[::-1] to height and width for clarity in segmentation examples
This commit is contained in:
David Zhang
2024-11-20 02:49:25 +11:00
committed by GitHub
parent fdb9230485
commit 427b62ed1a
8 changed files with 22 additions and 18 deletions

View File

@@ -57,7 +57,7 @@ Initially, an image is processed using a pre-trained convolutional neural networ
>>> with torch.no_grad(): >>> with torch.no_grad():
... outputs = model(**inputs) ... outputs = model(**inputs)
>>> results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3) >>> results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([(image.height, image.width)]), threshold=0.3)
>>> for result in results: >>> for result in results:
... for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]): ... for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):

View File

@@ -148,7 +148,7 @@ with torch.no_grad():
outputs = model(**inputs) outputs = model(**inputs)
# Post-process outputs # Post-process outputs
outputs = image_processor.post_process_instance_segmentation(outputs, target_sizes=[image.size[::-1]]) outputs = image_processor.post_process_instance_segmentation(outputs, target_sizes=[(image.height, image.width)])
print("Mask shape: ", outputs[0]["segmentation"].shape) print("Mask shape: ", outputs[0]["segmentation"].shape)
print("Mask values: ", outputs[0]["segmentation"].unique()) print("Mask values: ", outputs[0]["segmentation"].unique())

View File

@@ -1034,7 +1034,8 @@ class Mask2FormerImageProcessor(BaseImageProcessor):
) -> List[Dict]: ) -> List[Dict]:
""" """
Converts the output of [`Mask2FormerForUniversalSegmentationOutput`] into instance segmentation predictions. Converts the output of [`Mask2FormerForUniversalSegmentationOutput`] into instance segmentation predictions.
Only supports PyTorch. Only supports PyTorch. If instances could overlap, set either return_coco_annotation or return_binary_maps
to `True` to get the correct segmentation result.
Args: Args:
outputs ([`Mask2FormerForUniversalSegmentation`]): outputs ([`Mask2FormerForUniversalSegmentation`]):
@@ -1056,9 +1057,10 @@ class Mask2FormerImageProcessor(BaseImageProcessor):
(one per detected instance). (one per detected instance).
Returns: Returns:
`List[Dict]`: A list of dictionaries, one per image, each dictionary containing two keys: `List[Dict]`: A list of dictionaries, one per image, each dictionary containing two keys:
- **segmentation** -- A tensor of shape `(height, width)` where each pixel represents a `segment_id` or - **segmentation** -- A tensor of shape `(height, width)` where each pixel represents a `segment_id`, or
`List[List]` run-length encoding (RLE) of the segmentation map if return_coco_annotation is set to `List[List]` run-length encoding (RLE) of the segmentation map if return_coco_annotation is set to
`True`. Set to `None` if no mask if found above `threshold`. `True`, or a tensor of shape `(num_instances, height, width)` if return_binary_maps is set to `True`.
Set to `None` if no mask if found above `threshold`.
- **segments_info** -- A dictionary that contains additional information on each segment. - **segments_info** -- A dictionary that contains additional information on each segment.
- **id** -- An integer representing the `segment_id`. - **id** -- An integer representing the `segment_id`.
- **label_id** -- An integer representing the label / semantic class id corresponding to `segment_id`. - **label_id** -- An integer representing the label / semantic class id corresponding to `segment_id`.

View File

@@ -2428,8 +2428,8 @@ class Mask2FormerForUniversalSegmentation(Mask2FormerPreTrainedModel):
>>> masks_queries_logits = outputs.masks_queries_logits >>> masks_queries_logits = outputs.masks_queries_logits
>>> # Perform post-processing to get instance segmentation map >>> # Perform post-processing to get instance segmentation map
>>> pred_instance_map = image_processor.post_process_semantic_segmentation( >>> pred_instance_map = image_processor.post_process_instance_segmentation(
... outputs, target_sizes=[image.size[::-1]] ... outputs, target_sizes=[(image.height, image.width)]
... )[0] ... )[0]
>>> print(pred_instance_map.shape) >>> print(pred_instance_map.shape)
torch.Size([480, 640]) torch.Size([480, 640])
@@ -2462,7 +2462,7 @@ class Mask2FormerForUniversalSegmentation(Mask2FormerPreTrainedModel):
>>> # Perform post-processing to get semantic segmentation map >>> # Perform post-processing to get semantic segmentation map
>>> pred_semantic_map = image_processor.post_process_semantic_segmentation( >>> pred_semantic_map = image_processor.post_process_semantic_segmentation(
... outputs, target_sizes=[image.size[::-1]] ... outputs, target_sizes=[(image.height, image.width)]
... )[0] ... )[0]
>>> print(pred_semantic_map.shape) >>> print(pred_semantic_map.shape)
torch.Size([512, 683]) torch.Size([512, 683])
@@ -2496,7 +2496,7 @@ class Mask2FormerForUniversalSegmentation(Mask2FormerPreTrainedModel):
>>> # Perform post-processing to get panoptic segmentation map >>> # Perform post-processing to get panoptic segmentation map
>>> pred_panoptic_map = image_processor.post_process_panoptic_segmentation( >>> pred_panoptic_map = image_processor.post_process_panoptic_segmentation(
... outputs, target_sizes=[image.size[::-1]] ... outputs, target_sizes=[(image.height, image.width)]
... )[0]["segmentation"] ... )[0]["segmentation"]
>>> print(pred_panoptic_map.shape) >>> print(pred_panoptic_map.shape)
torch.Size([338, 676]) torch.Size([338, 676])

View File

@@ -1080,7 +1080,8 @@ class MaskFormerImageProcessor(BaseImageProcessor):
) -> List[Dict]: ) -> List[Dict]:
""" """
Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into instance segmentation predictions. Only Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into instance segmentation predictions. Only
supports PyTorch. supports PyTorch. If instances could overlap, set either return_coco_annotation or return_binary_maps
to `True` to get the correct segmentation result.
Args: Args:
outputs ([`MaskFormerForInstanceSegmentation`]): outputs ([`MaskFormerForInstanceSegmentation`]):
@@ -1102,9 +1103,10 @@ class MaskFormerImageProcessor(BaseImageProcessor):
(one per detected instance). (one per detected instance).
Returns: Returns:
`List[Dict]`: A list of dictionaries, one per image, each dictionary containing two keys: `List[Dict]`: A list of dictionaries, one per image, each dictionary containing two keys:
- **segmentation** -- A tensor of shape `(height, width)` where each pixel represents a `segment_id` or - **segmentation** -- A tensor of shape `(height, width)` where each pixel represents a `segment_id`, or
`List[List]` run-length encoding (RLE) of the segmentation map if return_coco_annotation is set to `List[List]` run-length encoding (RLE) of the segmentation map if return_coco_annotation is set to
`True`. Set to `None` if no mask if found above `threshold`. `True`, or a tensor of shape `(num_instances, height, width)` if return_binary_maps is set to `True`.
Set to `None` if no mask if found above `threshold`.
- **segments_info** -- A dictionary that contains additional information on each segment. - **segments_info** -- A dictionary that contains additional information on each segment.
- **id** -- An integer representing the `segment_id`. - **id** -- An integer representing the `segment_id`.
- **label_id** -- An integer representing the label / semantic class id corresponding to `segment_id`. - **label_id** -- An integer representing the label / semantic class id corresponding to `segment_id`.

View File

@@ -1780,7 +1780,7 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
>>> # you can pass them to image_processor for postprocessing >>> # you can pass them to image_processor for postprocessing
>>> predicted_semantic_map = image_processor.post_process_semantic_segmentation( >>> predicted_semantic_map = image_processor.post_process_semantic_segmentation(
... outputs, target_sizes=[image.size[::-1]] ... outputs, target_sizes=[(image.height, image.width)]
... )[0] ... )[0]
>>> # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs) >>> # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs)
@@ -1810,7 +1810,7 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
>>> masks_queries_logits = outputs.masks_queries_logits >>> masks_queries_logits = outputs.masks_queries_logits
>>> # you can pass them to image_processor for postprocessing >>> # you can pass them to image_processor for postprocessing
>>> result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0] >>> result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[(image.height, image.width)])[0]
>>> # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs) >>> # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs)
>>> predicted_panoptic_map = result["segmentation"] >>> predicted_panoptic_map = result["segmentation"]

View File

@@ -3161,7 +3161,7 @@ class OneFormerForUniversalSegmentation(OneFormerPreTrainedModel):
>>> # you can pass them to processor for semantic postprocessing >>> # you can pass them to processor for semantic postprocessing
>>> predicted_semantic_map = processor.post_process_semantic_segmentation( >>> predicted_semantic_map = processor.post_process_semantic_segmentation(
... outputs, target_sizes=[image.size[::-1]] ... outputs, target_sizes=[(image.height, image.width)]
... )[0] ... )[0]
>>> f"👉 Semantic Predictions Shape: {list(predicted_semantic_map.shape)}" >>> f"👉 Semantic Predictions Shape: {list(predicted_semantic_map.shape)}"
'👉 Semantic Predictions Shape: [512, 683]' '👉 Semantic Predictions Shape: [512, 683]'
@@ -3178,7 +3178,7 @@ class OneFormerForUniversalSegmentation(OneFormerPreTrainedModel):
>>> # you can pass them to processor for instance postprocessing >>> # you can pass them to processor for instance postprocessing
>>> predicted_instance_map = processor.post_process_instance_segmentation( >>> predicted_instance_map = processor.post_process_instance_segmentation(
... outputs, target_sizes=[image.size[::-1]] ... outputs, target_sizes=[(image.height, image.width)]
... )[0]["segmentation"] ... )[0]["segmentation"]
>>> f"👉 Instance Predictions Shape: {list(predicted_instance_map.shape)}" >>> f"👉 Instance Predictions Shape: {list(predicted_instance_map.shape)}"
'👉 Instance Predictions Shape: [512, 683]' '👉 Instance Predictions Shape: [512, 683]'
@@ -3195,7 +3195,7 @@ class OneFormerForUniversalSegmentation(OneFormerPreTrainedModel):
>>> # you can pass them to processor for panoptic postprocessing >>> # you can pass them to processor for panoptic postprocessing
>>> predicted_panoptic_map = processor.post_process_panoptic_segmentation( >>> predicted_panoptic_map = processor.post_process_panoptic_segmentation(
... outputs, target_sizes=[image.size[::-1]] ... outputs, target_sizes=[(image.height, image.width)]
... )[0]["segmentation"] ... )[0]["segmentation"]
>>> f"👉 Panoptic Predictions Shape: {list(predicted_panoptic_map.shape)}" >>> f"👉 Panoptic Predictions Shape: {list(predicted_panoptic_map.shape)}"
'👉 Panoptic Predictions Shape: [512, 683]' '👉 Panoptic Predictions Shape: [512, 683]'

View File

@@ -962,7 +962,7 @@ class SegGptForImageSegmentation(SegGptPreTrainedModel):
>>> inputs = image_processor(images=image_input, prompt_images=image_prompt, prompt_masks=mask_prompt, return_tensors="pt") >>> inputs = image_processor(images=image_input, prompt_images=image_prompt, prompt_masks=mask_prompt, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> result = image_processor.post_process_semantic_segmentation(outputs, target_sizes=[image_input.size[::-1]])[0] >>> result = image_processor.post_process_semantic_segmentation(outputs, target_sizes=[(image_input.height, image_input.width)])[0]
>>> print(list(result.shape)) >>> print(list(result.shape))
[170, 297] [170, 297]
``` ```