From 5fd5990dce2c6c147c7a5424f0767e3eb2279986 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 26 Oct 2022 10:44:36 +0200 Subject: [PATCH] Factored out some code in the `image-segmentation` pipeline. (#19727) * Factored out some code in the image-segmentation pipeline Re-enable `small_model_pt`. Re-enable `small_model_pt`. Enabling the current test with the current values. Debugging the values on the CI. More logs ? Printing doesn't work ? Using the CI values instead. Seems to be a Pillow sensitivity. Added a test showcasing that models not supporting some tasks get a clear error. Factored out code. Further factor out. Fixup. Bad rebase. Put `panoptic` before `instance` as it should be a superset. * Fixing tests. * Adding subtasks tests + Fixes `instance` segmentation which was broken due to default and non kwargs arguments. * Fix bad replace. --- .../models/detr/feature_extraction_detr.py | 27 ++--- .../pipelines/image_segmentation.py | 71 ++++-------- .../test_pipelines_image_segmentation.py | 107 ++++++++++++++---- 3 files changed, 121 insertions(+), 84 deletions(-) diff --git a/src/transformers/models/detr/feature_extraction_detr.py b/src/transformers/models/detr/feature_extraction_detr.py index 7d6b05a7dc..f13b3728f8 100644 --- a/src/transformers/models/detr/feature_extraction_detr.py +++ b/src/transformers/models/detr/feature_extraction_detr.py @@ -1275,12 +1275,13 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): # Get segmentation map and segment information of batch item target_size = target_sizes[i] if target_sizes is not None else None segmentation, segments = compute_segments( - mask_probs_item, - pred_scores_item, - pred_labels_item, - mask_threshold, - overlap_mask_area_threshold, - target_size, + mask_probs=mask_probs_item, + pred_scores=pred_scores_item, + pred_labels=pred_labels_item, + mask_threshold=mask_threshold, + overlap_mask_area_threshold=overlap_mask_area_threshold, + label_ids_to_fuse=[], + target_size=target_size, ) # Return segmentation map in run-length encoding (RLE) format @@ -1366,13 +1367,13 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): # Get segmentation map and segment information of batch item target_size = target_sizes[i] if target_sizes is not None else None segmentation, segments = compute_segments( - mask_probs_item, - pred_scores_item, - pred_labels_item, - mask_threshold, - overlap_mask_area_threshold, - label_ids_to_fuse, - target_size, + mask_probs=mask_probs_item, + pred_scores=pred_scores_item, + pred_labels=pred_labels_item, + mask_threshold=mask_threshold, + overlap_mask_area_threshold=overlap_mask_area_threshold, + label_ids_to_fuse=label_ids_to_fuse, + target_size=target_size, ) results.append({"segmentation": segmentation, "segments_info": segments}) diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py index 877c42a883..babd27a540 100644 --- a/src/transformers/pipelines/image_segmentation.py +++ b/src/transformers/pipelines/image_segmentation.py @@ -56,14 +56,15 @@ class ImageSegmentationPipeline(Pipeline): def _sanitize_parameters(self, **kwargs): postprocess_kwargs = {} - if "task" in kwargs: - postprocess_kwargs["task"] = kwargs["task"] + if "subtask" in kwargs: + postprocess_kwargs["subtask"] = kwargs["subtask"] if "threshold" in kwargs: postprocess_kwargs["threshold"] = kwargs["threshold"] if "mask_threshold" in kwargs: postprocess_kwargs["mask_threshold"] = kwargs["mask_threshold"] if "overlap_mask_area_threshold" in kwargs: postprocess_kwargs["overlap_mask_area_threshold"] = kwargs["overlap_mask_area_threshold"] + return {}, {}, postprocess_kwargs def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]: @@ -80,9 +81,10 @@ class ImageSegmentationPipeline(Pipeline): The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the same format: all as HTTP(S) links, all as local paths, or all as PIL images. - subtask (`str`, defaults to `panoptic`): + subtask (`str`, *optional*): Segmentation task to be performed, choose [`semantic`, `instance` and `panoptic`] depending on model - capabilities. + capabilities. If not set, the pipeline will attempt tp resolve in the following order: + `panoptic`, `instance`, `semantic`. threshold (`float`, *optional*, defaults to 0.9): Probability threshold to filter out predicted masks. mask_threshold (`float`, *optional*, defaults to 0.5): @@ -104,7 +106,6 @@ class ImageSegmentationPipeline(Pipeline): - **score** (*optional* `float`) -- Optionally, when the model is capable of estimating a confidence of the "object" described by the label and the mask. """ - return super().__call__(images, **kwargs) def preprocess(self, image): @@ -123,10 +124,15 @@ class ImageSegmentationPipeline(Pipeline): def postprocess( self, model_outputs, subtask=None, threshold=0.9, mask_threshold=0.5, overlap_mask_area_threshold=0.5 ): - if (subtask == "panoptic" or subtask is None) and hasattr( - self.feature_extractor, "post_process_panoptic_segmentation" - ): - outputs = self.feature_extractor.post_process_panoptic_segmentation( + + fn = None + if subtask in {"panoptic", None} and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"): + fn = self.feature_extractor.post_process_panoptic_segmentation + elif subtask in {"instance", None} and hasattr(self.feature_extractor, "post_process_instance_segmentation"): + fn = self.feature_extractor.post_process_instance_segmentation + + if fn is not None: + outputs = fn( model_outputs, threshold=threshold, mask_threshold=mask_threshold, @@ -137,45 +143,14 @@ class ImageSegmentationPipeline(Pipeline): annotation = [] segmentation = outputs["segmentation"] - if len(outputs["segments_info"]) == 0: - mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L") - annotation.append({"mask": mask, "label": "NULL", "score": 0.0}) - else: - for segment in outputs["segments_info"]: - mask = (segmentation == segment["id"]) * 255 - mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L") - label = self.model.config.id2label[segment["label_id"]] - score = segment["score"] - annotation.append({"score": score, "label": label, "mask": mask}) + for segment in outputs["segments_info"]: + mask = (segmentation == segment["id"]) * 255 + mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L") + label = self.model.config.id2label[segment["label_id"]] + score = segment["score"] + annotation.append({"score": score, "label": label, "mask": mask}) - elif (subtask == "instance" or subtask is None) and hasattr( - self.feature_extractor, "post_process_instance_segmentation" - ): - outputs = self.feature_extractor.post_process_instance_segmentation( - model_outputs, - threshold=threshold, - mask_threshold=mask_threshold, - overlap_mask_area_threshold=overlap_mask_area_threshold, - target_sizes=model_outputs["target_size"], - )[0] - - annotation = [] - segmentation = outputs["segmentation"] - - if len(outputs["segments_info"]) == 0: - mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L") - annotation.append({"mask": mask, "label": "NULL", "score": 0.0}) - else: - for segment in outputs["segments_info"]: - mask = (segmentation == segment["id"]) * 255 - mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L") - label = self.model.config.id2label[segment["label_id"]] - score = segment["score"] - annotation.append({"mask": mask, "label": label, "score": score}) - - elif (subtask == "semantic" or subtask is None) and hasattr( - self.feature_extractor, "post_process_semantic_segmentation" - ): + elif subtask in {"semantic", None} and hasattr(self.feature_extractor, "post_process_semantic_segmentation"): outputs = self.feature_extractor.post_process_semantic_segmentation( model_outputs, target_sizes=model_outputs["target_size"] )[0] @@ -190,5 +165,5 @@ class ImageSegmentationPipeline(Pipeline): label = self.model.config.id2label[label] annotation.append({"score": None, "label": label, "mask": mask}) else: - raise ValueError(f"Task {subtask} is not supported for model {self.model}.s") + raise ValueError(f"Subtask {subtask} is not supported for model {type(self.model)}") return annotation diff --git a/tests/pipelines/test_pipelines_image_segmentation.py b/tests/pipelines/test_pipelines_image_segmentation.py index 92ae2e942b..9c73d6c9b4 100644 --- a/tests/pipelines/test_pipelines_image_segmentation.py +++ b/tests/pipelines/test_pipelines_image_segmentation.py @@ -89,8 +89,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ) self.assertIsInstance(outputs, list) n = len(outputs) - if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation)): - # Instance segmentation (maskformer) have a slot for null class + if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation, DetrForSegmentation)): + # Instance segmentation (maskformer, and detr) have a slot for null class # and can output nothing even with a low threshold self.assertGreaterEqual(n, 0) else: @@ -153,26 +153,53 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa def test_small_model_tf(self): pass + @require_torch + def test_small_model_pt_no_panoptic(self): + model_id = "hf-internal-testing/tiny-random-mobilevit" + # The default task is `image-classification` we need to override + pipe = pipeline(task="image-segmentation", model=model_id) + + # This model does NOT support neither `instance` nor `panoptic` + # We should error out + with self.assertRaises(ValueError) as e: + pipe("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic") + self.assertEqual( + str(e.exception), + "Subtask panoptic is not supported for model ", + ) + with self.assertRaises(ValueError) as e: + pipe("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="instance") + self.assertEqual( + str(e.exception), + "Subtask instance is not supported for model ", + ) + @require_torch def test_small_model_pt(self): model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic" model = AutoModelForImageSegmentation.from_pretrained(model_id) feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) - image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor) - - outputs = image_segmenter( - "http://images.cocodataset.org/val2017/000000039769.jpg", + image_segmenter = ImageSegmentationPipeline( + model=model, + feature_extractor=feature_extractor, subtask="panoptic", threshold=0.0, mask_threshold=0.0, overlap_mask_area_threshold=0.0, ) + outputs = image_segmenter( + "http://images.cocodataset.org/val2017/000000039769.jpg", + ) + # Shortening by hashing for o in outputs: o["mask"] = mask_to_test_readable(o["mask"]) + # This is extremely brittle, and those values are made specific for the CI. self.assertEqual( nested_simplify(outputs, decimals=4), [ @@ -189,9 +216,6 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa "http://images.cocodataset.org/val2017/000000039769.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg", ], - threshold=0.0, - mask_threshold=0.0, - overlap_mask_area_threshold=0.0, ) for output in outputs: for o in output: @@ -217,6 +241,48 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ], ) + output = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="instance") + for o in output: + o["mask"] = mask_to_test_readable(o["mask"]) + self.assertEqual( + nested_simplify(output, decimals=4), + [ + { + "score": 0.004, + "label": "LABEL_215", + "mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200}, + }, + ], + ) + + # This must be surprising to the reader. + # The `panoptic` returns only LABEL_215, and this returns 3 labels. + # + output = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="semantic") + for o in output: + o["mask"] = mask_to_test_readable(o["mask"]) + self.maxDiff = None + self.assertEqual( + nested_simplify(output, decimals=4), + [ + { + "label": "LABEL_88", + "mask": {"hash": "7f0bf661a4", "shape": (480, 640), "white_pixels": 3}, + "score": None, + }, + { + "label": "LABEL_101", + "mask": {"hash": "10ab738dc9", "shape": (480, 640), "white_pixels": 8948}, + "score": None, + }, + { + "label": "LABEL_215", + "mask": {"hash": "b431e0946c", "shape": (480, 640), "white_pixels": 298249}, + "score": None, + }, + ], + ) + @require_torch def test_small_model_pt_semantic(self): model_id = "hf-internal-testing/tiny-random-beit-pipeline" @@ -246,13 +312,15 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa @slow def test_integration_torch_image_segmentation(self): model_id = "facebook/detr-resnet-50-panoptic" - image_segmenter = pipeline("image-segmentation", model=model_id) + image_segmenter = pipeline( + "image-segmentation", + model=model_id, + threshold=0.0, + overlap_mask_area_threshold=0.0, + ) outputs = image_segmenter( "http://images.cocodataset.org/val2017/000000039769.jpg", - subtask="panoptic", - threshold=0, - overlap_mask_area_threshold=0.0, ) # Shortening by hashing @@ -300,9 +368,6 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa "http://images.cocodataset.org/val2017/000000039769.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg", ], - subtask="panoptic", - threshold=0.0, - overlap_mask_area_threshold=0.0, ) # Shortening by hashing @@ -386,9 +451,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa model_id = "facebook/detr-resnet-50-panoptic" image_segmenter = pipeline("image-segmentation", model=model_id) - outputs = image_segmenter( - "http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic", threshold=0.999 - ) + outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.999) # Shortening by hashing for o in outputs: o["mask"] = mask_to_test_readable(o["mask"]) @@ -409,9 +472,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ], ) - outputs = image_segmenter( - "http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic", threshold=0.5 - ) + outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.5) for o in outputs: o["mask"] = mask_to_test_readable(o["mask"]) @@ -460,7 +521,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") file = image[0]["file"] - outputs = image_segmenter(file, subtask="panoptic", threshold=threshold) + outputs = image_segmenter(file, threshold=threshold) # Shortening by hashing for o in outputs: