From cca51aa15135d660501130b4f683cdd6d1fd5640 Mon Sep 17 00:00:00 2001 From: Alara Dirik <8944735+alaradirik@users.noreply.github.com> Date: Fri, 21 Oct 2022 18:09:58 +0300 Subject: [PATCH] Fix image segmentation pipeline errors, resolve backward compatibility issues (#19768) * Fix panoptic segmentation and pipeline * Update ImageSegmentationPipeline tests and reenable test_small_model_pt * Resolve backward compatibility issues --- .../models/detr/feature_extraction_detr.py | 17 ++- .../feature_extraction_maskformer.py | 34 +++-- .../pipelines/image_segmentation.py | 70 ++++++---- .../test_feature_extraction_maskformer.py | 4 +- .../test_pipelines_image_segmentation.py | 125 +++++------------- 5 files changed, 113 insertions(+), 137 deletions(-) diff --git a/src/transformers/models/detr/feature_extraction_detr.py b/src/transformers/models/detr/feature_extraction_detr.py index dac13d1d1b..7d6b05a7dc 100644 --- a/src/transformers/models/detr/feature_extraction_detr.py +++ b/src/transformers/models/detr/feature_extraction_detr.py @@ -190,13 +190,13 @@ def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_ return masks[to_keep], scores[to_keep], labels[to_keep] -def check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_threshold=0.8): +def check_segment_validity(mask_labels, mask_probs, k, mask_threshold=0.5, overlap_mask_area_threshold=0.8): # Get the mask associated with the k class mask_k = mask_labels == k mask_k_area = mask_k.sum() # Compute the area of all the stuff in query k - original_area = (mask_probs[k] >= 0.5).sum() + original_area = (mask_probs[k] >= mask_threshold).sum() mask_exists = mask_k_area > 0 and original_area > 0 # Eliminate disconnected tiny segments @@ -212,6 +212,7 @@ def compute_segments( mask_probs, pred_scores, pred_labels, + mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, target_size: Tuple[int, int] = None, @@ -240,7 +241,9 @@ def compute_segments( should_fuse = pred_class in label_ids_to_fuse # Check if mask exists and large enough to be a segment - mask_exists, mask_k = check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_threshold) + mask_exists, mask_k = check_segment_validity( + mask_labels, mask_probs, k, mask_threshold, overlap_mask_area_threshold + ) if mask_exists: if pred_class in stuff_memory_list: @@ -1210,6 +1213,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): self, outputs, threshold: float = 0.5, + mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, target_sizes: Optional[List[Tuple[int, int]]] = None, return_coco_annotation: Optional[bool] = False, @@ -1221,6 +1225,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): Raw outputs of the model. threshold (`float`, *optional*, defaults to 0.5): The probability score threshold to keep predicted instance masks. + mask_threshold (`float`, *optional*, defaults to 0.5): + Threshold to use when turning the predicted masks into binary values. overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8): The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask. @@ -1272,6 +1278,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): mask_probs_item, pred_scores_item, pred_labels_item, + mask_threshold, overlap_mask_area_threshold, target_size, ) @@ -1287,6 +1294,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): self, outputs, threshold: float = 0.5, + mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, target_sizes: Optional[List[Tuple[int, int]]] = None, @@ -1299,6 +1307,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): The outputs from [`DetrForSegmentation`]. threshold (`float`, *optional*, defaults to 0.5): The probability score threshold to keep predicted instance masks. + mask_threshold (`float`, *optional*, defaults to 0.5): + Threshold to use when turning the predicted masks into binary values. overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8): The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask. @@ -1359,6 +1369,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): mask_probs_item, pred_scores_item, pred_labels_item, + mask_threshold, overlap_mask_area_threshold, label_ids_to_fuse, target_size, diff --git a/src/transformers/models/maskformer/feature_extraction_maskformer.py b/src/transformers/models/maskformer/feature_extraction_maskformer.py index 0b0bc320e7..615f223df7 100644 --- a/src/transformers/models/maskformer/feature_extraction_maskformer.py +++ b/src/transformers/models/maskformer/feature_extraction_maskformer.py @@ -37,15 +37,14 @@ if is_torch_available(): logger = logging.get_logger(__name__) +# Copied from transformers.models.detr.feature_extraction_detr.binary_mask_to_rle def binary_mask_to_rle(mask): """ - Converts given binary mask of shape (height, width) to the run-length encoding (RLE) format. - Args: + Converts given binary mask of shape (height, width) to the run-length encoding (RLE) format. mask (`torch.Tensor` or `numpy.array`): A binary mask tensor of shape `(height, width)` where 0 denotes background and 1 denotes the target segment_id or class_id. - Returns: `List`: Run-length encoded list of the binary mask. Refer to COCO API for more information about the RLE format. @@ -60,6 +59,7 @@ def binary_mask_to_rle(mask): return [x for x in runs] +# Copied from transformers.models.detr.feature_extraction_detr.convert_segmentation_to_rle def convert_segmentation_to_rle(segmentation): """ Converts given segmentation map of shape (height, width) to the run-length encoding (RLE) format. @@ -67,7 +67,6 @@ def convert_segmentation_to_rle(segmentation): Args: segmentation (`torch.Tensor` or `numpy.array`): A segmentation map of shape `(height, width)` where each value denotes a segment or class id. - Returns: `List[List]`: A list of lists, where each list is the run-length encoding of a segment / class id. """ @@ -82,6 +81,7 @@ def convert_segmentation_to_rle(segmentation): return run_length_encodings +# Copied from transformers.models.detr.feature_extraction_detr.remove_low_and_no_objects def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_labels): """ Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and @@ -96,10 +96,8 @@ def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_ A tensor of shape `(num_queries)`. object_mask_threshold (`float`): A number between 0 and 1 used to binarize the masks. - Raises: `ValueError`: Raised when the first dimension doesn't match in all input tensors. - Returns: `Tuple[`torch.Tensor`, `torch.Tensor`, `torch.Tensor`]`: The `masks`, `scores` and `labels` without the region < `object_mask_threshold`. @@ -108,16 +106,18 @@ def remove_low_and_no_objects(masks, scores, labels, object_mask_threshold, num_ raise ValueError("mask, scores and labels must have the same shape!") to_keep = labels.ne(num_labels) & (scores > object_mask_threshold) + return masks[to_keep], scores[to_keep], labels[to_keep] -def check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_threshold=0.8): +# Copied from transformers.models.detr.feature_extraction_detr.check_segment_validity +def check_segment_validity(mask_labels, mask_probs, k, mask_threshold=0.5, overlap_mask_area_threshold=0.8): # Get the mask associated with the k class mask_k = mask_labels == k mask_k_area = mask_k.sum() # Compute the area of all the stuff in query k - original_area = (mask_probs[k] >= 0.5).sum() + original_area = (mask_probs[k] >= mask_threshold).sum() mask_exists = mask_k_area > 0 and original_area > 0 # Eliminate disconnected tiny segments @@ -129,10 +129,12 @@ def check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_thresho return mask_exists, mask_k +# Copied from transformers.models.detr.feature_extraction_detr.compute_segments def compute_segments( mask_probs, pred_scores, pred_labels, + mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, target_size: Tuple[int, int] = None, @@ -144,7 +146,9 @@ def compute_segments( segments: List[Dict] = [] if target_size is not None: - mask_probs = interpolate(mask_probs.unsqueeze(0), size=target_size, mode="bilinear", align_corners=False)[0] + mask_probs = nn.functional.interpolate( + mask_probs.unsqueeze(0), size=target_size, mode="bilinear", align_corners=False + )[0] current_segment_id = 0 @@ -159,7 +163,9 @@ def compute_segments( should_fuse = pred_class in label_ids_to_fuse # Check if mask exists and large enough to be a segment - mask_exists, mask_k = check_segment_validity(mask_labels, mask_probs, k, overlap_mask_area_threshold) + mask_exists, mask_k = check_segment_validity( + mask_labels, mask_probs, k, mask_threshold, overlap_mask_area_threshold + ) if mask_exists: if pred_class in stuff_memory_list: @@ -722,6 +728,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM self, outputs, threshold: float = 0.5, + mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, target_sizes: Optional[List[Tuple[int, int]]] = None, return_coco_annotation: Optional[bool] = False, @@ -735,6 +742,8 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM Raw outputs of the model. threshold (`float`, *optional*, defaults to 0.5): The probability score threshold to keep predicted instance masks. + mask_threshold (`float`, *optional*, defaults to 0.5): + Threshold to use when turning the predicted masks into binary values. overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8): The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask. @@ -786,6 +795,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM mask_probs_item, pred_scores_item, pred_labels_item, + mask_threshold, overlap_mask_area_threshold, target_size, ) @@ -801,6 +811,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM self, outputs, threshold: float = 0.5, + mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, target_sizes: Optional[List[Tuple[int, int]]] = None, @@ -814,6 +825,8 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM The outputs from [`MaskFormerForInstanceSegmentation`]. threshold (`float`, *optional*, defaults to 0.5): The probability score threshold to keep predicted instance masks. + mask_threshold (`float`, *optional*, defaults to 0.5): + Threshold to use when turning the predicted masks into binary values. overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8): The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask. @@ -875,6 +888,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM mask_probs_item, pred_scores_item, pred_labels_item, + mask_threshold, overlap_mask_area_threshold, label_ids_to_fuse, target_size, diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py index a085360e34..877c42a883 100644 --- a/src/transformers/pipelines/image_segmentation.py +++ b/src/transformers/pipelines/image_segmentation.py @@ -60,6 +60,8 @@ class ImageSegmentationPipeline(Pipeline): postprocess_kwargs["task"] = kwargs["task"] if "threshold" in kwargs: postprocess_kwargs["threshold"] = kwargs["threshold"] + if "mask_threshold" in kwargs: + postprocess_kwargs["mask_threshold"] = kwargs["mask_threshold"] if "overlap_mask_area_threshold" in kwargs: postprocess_kwargs["overlap_mask_area_threshold"] = kwargs["overlap_mask_area_threshold"] return {}, {}, postprocess_kwargs @@ -78,11 +80,13 @@ class ImageSegmentationPipeline(Pipeline): The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the same format: all as HTTP(S) links, all as local paths, or all as PIL images. - task (`str`, defaults to `semantic`): + subtask (`str`, defaults to `panoptic`): Segmentation task to be performed, choose [`semantic`, `instance` and `panoptic`] depending on model capabilities. threshold (`float`, *optional*, defaults to 0.9): Probability threshold to filter out predicted masks. + mask_threshold (`float`, *optional*, defaults to 0.5): + Threshold to use when turning the predicted masks into binary values. overlap_mask_area_threshold (`float`, *optional*, defaults to 0.5): Mask overlap threshold to eliminate small, disconnected segments. @@ -116,11 +120,16 @@ class ImageSegmentationPipeline(Pipeline): model_outputs["target_size"] = target_size return model_outputs - def postprocess(self, model_outputs, task="semantic", threshold=0.9, overlap_mask_area_threshold=0.5): - if task == "instance" and hasattr(self.feature_extractor, "post_process_instance_segmentation"): + def postprocess( + self, model_outputs, subtask=None, threshold=0.9, mask_threshold=0.5, overlap_mask_area_threshold=0.5 + ): + if (subtask == "panoptic" or subtask is None) and hasattr( + self.feature_extractor, "post_process_panoptic_segmentation" + ): outputs = self.feature_extractor.post_process_panoptic_segmentation( model_outputs, threshold=threshold, + mask_threshold=mask_threshold, overlap_mask_area_threshold=overlap_mask_area_threshold, target_sizes=model_outputs["target_size"], )[0] @@ -130,29 +139,7 @@ class ImageSegmentationPipeline(Pipeline): if len(outputs["segments_info"]) == 0: mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L") - annotation.append({"mask": mask, "label": None, "score": 0.0}) - else: - for segment in outputs["segments_info"]: - mask = (segmentation == segment["id"]) * 255 - mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L") - label = self.model.config.id2label[segment["label_id"]] - score = segment["score"] - annotation.append({"mask": mask, "label": label, "score": score}) - - elif task == "panoptic" and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"): - outputs = self.feature_extractor.post_process_panoptic_segmentation( - model_outputs, - threshold=threshold, - overlap_mask_area_threshold=overlap_mask_area_threshold, - target_sizes=model_outputs["target_size"], - )[0] - - annotation = [] - segmentation = outputs["segmentation"] - - if len(outputs["segments_info"]) == 0: - mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L") - annotation.append({"mask": mask, "label": None, "score": 0.0}) + annotation.append({"mask": mask, "label": "NULL", "score": 0.0}) else: for segment in outputs["segments_info"]: mask = (segmentation == segment["id"]) * 255 @@ -161,7 +148,34 @@ class ImageSegmentationPipeline(Pipeline): score = segment["score"] annotation.append({"score": score, "label": label, "mask": mask}) - elif task == "semantic" and hasattr(self.feature_extractor, "post_process_semantic_segmentation"): + elif (subtask == "instance" or subtask is None) and hasattr( + self.feature_extractor, "post_process_instance_segmentation" + ): + outputs = self.feature_extractor.post_process_instance_segmentation( + model_outputs, + threshold=threshold, + mask_threshold=mask_threshold, + overlap_mask_area_threshold=overlap_mask_area_threshold, + target_sizes=model_outputs["target_size"], + )[0] + + annotation = [] + segmentation = outputs["segmentation"] + + if len(outputs["segments_info"]) == 0: + mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L") + annotation.append({"mask": mask, "label": "NULL", "score": 0.0}) + else: + for segment in outputs["segments_info"]: + mask = (segmentation == segment["id"]) * 255 + mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L") + label = self.model.config.id2label[segment["label_id"]] + score = segment["score"] + annotation.append({"mask": mask, "label": label, "score": score}) + + elif (subtask == "semantic" or subtask is None) and hasattr( + self.feature_extractor, "post_process_semantic_segmentation" + ): outputs = self.feature_extractor.post_process_semantic_segmentation( model_outputs, target_sizes=model_outputs["target_size"] )[0] @@ -176,5 +190,5 @@ class ImageSegmentationPipeline(Pipeline): label = self.model.config.id2label[label] annotation.append({"score": None, "label": label, "mask": mask}) else: - raise ValueError(f"task {task} is not supported for model {self.model}") + raise ValueError(f"Task {subtask} is not supported for model {self.model}.s") return annotation diff --git a/tests/models/maskformer/test_feature_extraction_maskformer.py b/tests/models/maskformer/test_feature_extraction_maskformer.py index fbafa9af15..063c4c754c 100644 --- a/tests/models/maskformer/test_feature_extraction_maskformer.py +++ b/tests/models/maskformer/test_feature_extraction_maskformer.py @@ -399,13 +399,11 @@ class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest self.assertEqual(segmentation[0].shape, target_sizes[0]) - @unittest.skip("Fix me Alara!") def test_post_process_panoptic_segmentation(self): feature_extractor = self.feature_extraction_class(num_labels=self.feature_extract_tester.num_classes) outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = feature_extractor.post_process_panoptic_segmentation(outputs, threshold=0) - print(len(segmentation)) - print(self.feature_extract_tester.batch_size) + self.assertTrue(len(segmentation) == self.feature_extract_tester.batch_size) for el in segmentation: self.assertTrue("segmentation" in el) diff --git a/tests/pipelines/test_pipelines_image_segmentation.py b/tests/pipelines/test_pipelines_image_segmentation.py index d06c2926ff..92ae2e942b 100644 --- a/tests/pipelines/test_pipelines_image_segmentation.py +++ b/tests/pipelines/test_pipelines_image_segmentation.py @@ -81,7 +81,12 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ] def run_pipeline_test(self, image_segmenter, examples): - outputs = image_segmenter("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0) + outputs = image_segmenter( + "./tests/fixtures/tests_samples/COCO/000000039769.png", + threshold=0.0, + mask_threshold=0, + overlap_mask_area_threshold=0, + ) self.assertIsInstance(outputs, list) n = len(outputs) if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation)): @@ -97,15 +102,15 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test") # RGBA - outputs = image_segmenter(dataset[0]["file"]) + outputs = image_segmenter(dataset[0]["file"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0) m = len(outputs) self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs) # LA - outputs = image_segmenter(dataset[1]["file"]) + outputs = image_segmenter(dataset[1]["file"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0) m = len(outputs) self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs) # L - outputs = image_segmenter(dataset[2]["file"]) + outputs = image_segmenter(dataset[2]["file"], threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0) m = len(outputs) self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs) @@ -126,7 +131,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa "./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png", ] - outputs = image_segmenter(batch, threshold=0.0, batch_size=batch_size) + outputs = image_segmenter( + batch, threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0, batch_size=batch_size + ) self.assertEqual(len(batch), len(outputs)) self.assertEqual(len(outputs[0]), n) self.assertEqual( @@ -152,55 +159,29 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa model = AutoModelForImageSegmentation.from_pretrained(model_id) feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) - image_segmenter = ImageSegmentationPipeline( - model=model, - feature_extractor=feature_extractor, - task="semantic", - threshold=0.0, - overlap_mask_area_threshold=0.0, - ) + image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor) outputs = image_segmenter( "http://images.cocodataset.org/val2017/000000039769.jpg", + subtask="panoptic", + threshold=0.0, + mask_threshold=0.0, + overlap_mask_area_threshold=0.0, ) # Shortening by hashing for o in outputs: o["mask"] = mask_to_test_readable(o["mask"]) - # This is extremely brittle, and those values are made specific for the CI. self.assertEqual( nested_simplify(outputs, decimals=4), [ { - "label": "LABEL_88", - "mask": {"hash": "7f0bf661a4", "shape": (480, 640), "white_pixels": 3}, - "score": None, - }, - { - "label": "LABEL_101", - "mask": {"hash": "10ab738dc9", "shape": (480, 640), "white_pixels": 8948}, - "score": None, - }, - { + "score": 0.004, "label": "LABEL_215", - "mask": {"hash": "b431e0946c", "shape": (480, 640), "white_pixels": 298249}, - "score": None, + "mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200}, }, - ] - # Temporary: Keeping around the old values as they might provide useful later - # [ - # { - # "score": 0.004, - # "label": "LABEL_215", - # "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0}, - # }, - # { - # "score": 0.004, - # "label": "LABEL_215", - # "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0}, - # }, - # ], + ], ) outputs = image_segmenter( @@ -209,6 +190,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa "http://images.cocodataset.org/val2017/000000039769.jpg", ], threshold=0.0, + mask_threshold=0.0, + overlap_mask_area_threshold=0.0, ) for output in outputs: for o in output: @@ -219,62 +202,18 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa [ [ { - "label": "LABEL_88", - "mask": {"hash": "7f0bf661a4", "shape": (480, 640), "white_pixels": 3}, - "score": None, - }, - { - "label": "LABEL_101", - "mask": {"hash": "10ab738dc9", "shape": (480, 640), "white_pixels": 8948}, - "score": None, - }, - { + "score": 0.004, "label": "LABEL_215", - "mask": {"hash": "b431e0946c", "shape": (480, 640), "white_pixels": 298249}, - "score": None, + "mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200}, }, ], [ { - "label": "LABEL_88", - "mask": {"hash": "7f0bf661a4", "shape": (480, 640), "white_pixels": 3}, - "score": None, - }, - { - "label": "LABEL_101", - "mask": {"hash": "10ab738dc9", "shape": (480, 640), "white_pixels": 8948}, - "score": None, - }, - { + "score": 0.004, "label": "LABEL_215", - "mask": {"hash": "b431e0946c", "shape": (480, 640), "white_pixels": 298249}, - "score": None, + "mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200}, }, - ] - # [ - # { - # "score": 0.004, - # "label": "LABEL_215", - # "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0}, - # }, - # { - # "score": 0.004, - # "label": "LABEL_215", - # "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0}, - # }, - # ], - # [ - # { - # "score": 0.004, - # "label": "LABEL_215", - # "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0}, - # }, - # { - # "score": 0.004, - # "label": "LABEL_215", - # "mask": {"hash": "34eecd16bb", "shape": (480, 640), "white_pixels": 0}, - # }, - # ], + ], ], ) @@ -311,7 +250,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa outputs = image_segmenter( "http://images.cocodataset.org/val2017/000000039769.jpg", - task="panoptic", + subtask="panoptic", threshold=0, overlap_mask_area_threshold=0.0, ) @@ -361,7 +300,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa "http://images.cocodataset.org/val2017/000000039769.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg", ], - task="panoptic", + subtask="panoptic", threshold=0.0, overlap_mask_area_threshold=0.0, ) @@ -448,7 +387,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa image_segmenter = pipeline("image-segmentation", model=model_id) outputs = image_segmenter( - "http://images.cocodataset.org/val2017/000000039769.jpg", task="panoptic", threshold=0.999 + "http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic", threshold=0.999 ) # Shortening by hashing for o in outputs: @@ -471,7 +410,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ) outputs = image_segmenter( - "http://images.cocodataset.org/val2017/000000039769.jpg", task="panoptic", threshold=0.5 + "http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic", threshold=0.5 ) for o in outputs: @@ -521,7 +460,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") file = image[0]["file"] - outputs = image_segmenter(file, task="panoptic", threshold=threshold) + outputs = image_segmenter(file, subtask="panoptic", threshold=threshold) # Shortening by hashing for o in outputs: