Fix MaskformerFeatureExtractor (#20100)
* Fix bug * Add another fix * Add print statement * Apply fix * Fix feature extractor * Fix feature extractor * Add print statements * Add print statements * Remove print statements * Add instance segmentation integration test * Add integration test for semantic segmentation * Add draft for panoptic segmentation integration test * Fix integration test for panoptic segmentation * Remove slow annotator Co-authored-by: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
This commit is contained in:
@@ -17,7 +17,9 @@
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
from datasets import load_dataset
|
||||
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers.testing_utils import require_torch, require_vision
|
||||
from transformers.utils import is_torch_available, is_vision_available
|
||||
|
||||
@@ -345,6 +347,173 @@ class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest
|
||||
common(is_instance_map=False, segmentation_type="pil")
|
||||
common(is_instance_map=True, segmentation_type="pil")
|
||||
|
||||
def test_integration_instance_segmentation(self):
|
||||
# load 2 images and corresponding annotations from the hub
|
||||
repo_id = "nielsr/image-segmentation-toy-data"
|
||||
image1 = Image.open(
|
||||
hf_hub_download(repo_id=repo_id, filename="instance_segmentation_image_1.png", repo_type="dataset")
|
||||
)
|
||||
image2 = Image.open(
|
||||
hf_hub_download(repo_id=repo_id, filename="instance_segmentation_image_2.png", repo_type="dataset")
|
||||
)
|
||||
annotation1 = Image.open(
|
||||
hf_hub_download(repo_id=repo_id, filename="instance_segmentation_annotation_1.png", repo_type="dataset")
|
||||
)
|
||||
annotation2 = Image.open(
|
||||
hf_hub_download(repo_id=repo_id, filename="instance_segmentation_annotation_2.png", repo_type="dataset")
|
||||
)
|
||||
|
||||
# get instance segmentations and instance-to-segmentation mappings
|
||||
def get_instance_segmentation_and_mapping(annotation):
|
||||
instance_seg = np.array(annotation)[:, :, 1]
|
||||
class_id_map = np.array(annotation)[:, :, 0]
|
||||
class_labels = np.unique(class_id_map)
|
||||
|
||||
# create mapping between instance IDs and semantic category IDs
|
||||
inst2class = {}
|
||||
for label in class_labels:
|
||||
instance_ids = np.unique(instance_seg[class_id_map == label])
|
||||
inst2class.update({i: label for i in instance_ids})
|
||||
|
||||
return instance_seg, inst2class
|
||||
|
||||
instance_seg1, inst2class1 = get_instance_segmentation_and_mapping(annotation1)
|
||||
instance_seg2, inst2class2 = get_instance_segmentation_and_mapping(annotation2)
|
||||
|
||||
# create a feature extractor
|
||||
feature_extractor = MaskFormerFeatureExtractor(reduce_labels=True, ignore_index=255, size=(512, 512))
|
||||
|
||||
# prepare the images and annotations
|
||||
inputs = feature_extractor(
|
||||
[image1, image2],
|
||||
[instance_seg1, instance_seg2],
|
||||
instance_id_to_semantic_id=[inst2class1, inst2class2],
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
# verify the pixel values and pixel mask
|
||||
self.assertEqual(inputs["pixel_values"].shape, (2, 3, 512, 512))
|
||||
self.assertEqual(inputs["pixel_mask"].shape, (2, 512, 512))
|
||||
|
||||
# verify the class labels
|
||||
self.assertEqual(len(inputs["class_labels"]), 2)
|
||||
self.assertTrue(torch.allclose(inputs["class_labels"][0], torch.tensor([30, 55])))
|
||||
self.assertTrue(torch.allclose(inputs["class_labels"][1], torch.tensor([4, 4, 23, 55])))
|
||||
|
||||
# verify the mask labels
|
||||
self.assertEqual(len(inputs["mask_labels"]), 2)
|
||||
self.assertEqual(inputs["mask_labels"][0].shape, (2, 512, 512))
|
||||
self.assertEqual(inputs["mask_labels"][1].shape, (4, 512, 512))
|
||||
self.assertEquals(inputs["mask_labels"][0].sum().item(), 41527.0)
|
||||
self.assertEquals(inputs["mask_labels"][1].sum().item(), 26259.0)
|
||||
|
||||
def test_integration_semantic_segmentation(self):
|
||||
# load 2 images and corresponding semantic annotations from the hub
|
||||
repo_id = "nielsr/image-segmentation-toy-data"
|
||||
image1 = Image.open(
|
||||
hf_hub_download(repo_id=repo_id, filename="semantic_segmentation_image_1.png", repo_type="dataset")
|
||||
)
|
||||
image2 = Image.open(
|
||||
hf_hub_download(repo_id=repo_id, filename="semantic_segmentation_image_2.png", repo_type="dataset")
|
||||
)
|
||||
annotation1 = Image.open(
|
||||
hf_hub_download(repo_id=repo_id, filename="semantic_segmentation_annotation_1.png", repo_type="dataset")
|
||||
)
|
||||
annotation2 = Image.open(
|
||||
hf_hub_download(repo_id=repo_id, filename="semantic_segmentation_annotation_2.png", repo_type="dataset")
|
||||
)
|
||||
|
||||
# create a feature extractor
|
||||
feature_extractor = MaskFormerFeatureExtractor(reduce_labels=True, ignore_index=255, size=(512, 512))
|
||||
|
||||
# prepare the images and annotations
|
||||
inputs = feature_extractor(
|
||||
[image1, image2],
|
||||
[annotation1, annotation2],
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
# verify the pixel values and pixel mask
|
||||
self.assertEqual(inputs["pixel_values"].shape, (2, 3, 512, 512))
|
||||
self.assertEqual(inputs["pixel_mask"].shape, (2, 512, 512))
|
||||
|
||||
# verify the class labels
|
||||
self.assertEqual(len(inputs["class_labels"]), 2)
|
||||
self.assertTrue(torch.allclose(inputs["class_labels"][0], torch.tensor([2, 4, 60])))
|
||||
self.assertTrue(torch.allclose(inputs["class_labels"][1], torch.tensor([0, 3, 7, 8, 15, 28, 30, 143])))
|
||||
|
||||
# verify the mask labels
|
||||
self.assertEqual(len(inputs["mask_labels"]), 2)
|
||||
self.assertEqual(inputs["mask_labels"][0].shape, (3, 512, 512))
|
||||
self.assertEqual(inputs["mask_labels"][1].shape, (8, 512, 512))
|
||||
self.assertEquals(inputs["mask_labels"][0].sum().item(), 170200.0)
|
||||
self.assertEquals(inputs["mask_labels"][1].sum().item(), 257036.0)
|
||||
|
||||
def test_integration_panoptic_segmentation(self):
|
||||
# load 2 images and corresponding panoptic annotations from the hub
|
||||
dataset = load_dataset("nielsr/ade20k-panoptic-demo")
|
||||
image1 = dataset["train"][0]["image"]
|
||||
image2 = dataset["train"][1]["image"]
|
||||
segments_info1 = dataset["train"][0]["segments_info"]
|
||||
segments_info2 = dataset["train"][1]["segments_info"]
|
||||
annotation1 = dataset["train"][0]["label"]
|
||||
annotation2 = dataset["train"][1]["label"]
|
||||
|
||||
def rgb_to_id(color):
|
||||
if isinstance(color, np.ndarray) and len(color.shape) == 3:
|
||||
if color.dtype == np.uint8:
|
||||
color = color.astype(np.int32)
|
||||
return color[:, :, 0] + 256 * color[:, :, 1] + 256 * 256 * color[:, :, 2]
|
||||
return int(color[0] + 256 * color[1] + 256 * 256 * color[2])
|
||||
|
||||
def create_panoptic_map(annotation, segments_info):
|
||||
annotation = np.array(annotation)
|
||||
# convert RGB to segment IDs per pixel
|
||||
# 0 is the "ignore" label, for which we don't need to make binary masks
|
||||
panoptic_map = rgb_to_id(annotation)
|
||||
|
||||
# create mapping between segment IDs and semantic classes
|
||||
inst2class = {segment["id"]: segment["category_id"] for segment in segments_info}
|
||||
|
||||
return panoptic_map, inst2class
|
||||
|
||||
panoptic_map1, inst2class1 = create_panoptic_map(annotation1, segments_info1)
|
||||
panoptic_map2, inst2class2 = create_panoptic_map(annotation2, segments_info2)
|
||||
|
||||
# create a feature extractor
|
||||
feature_extractor = MaskFormerFeatureExtractor(ignore_index=0, do_resize=False)
|
||||
|
||||
# prepare the images and annotations
|
||||
pixel_values_list = [np.moveaxis(np.array(image1), -1, 0), np.moveaxis(np.array(image2), -1, 0)]
|
||||
inputs = feature_extractor.encode_inputs(
|
||||
pixel_values_list,
|
||||
[panoptic_map1, panoptic_map2],
|
||||
instance_id_to_semantic_id=[inst2class1, inst2class2],
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
# verify the pixel values and pixel mask
|
||||
self.assertEqual(inputs["pixel_values"].shape, (2, 3, 512, 711))
|
||||
self.assertEqual(inputs["pixel_mask"].shape, (2, 512, 711))
|
||||
|
||||
# verify the class labels
|
||||
self.assertEqual(len(inputs["class_labels"]), 2)
|
||||
# fmt: off
|
||||
expected_class_labels = torch.tensor([4, 17, 32, 42, 42, 42, 42, 42, 42, 42, 32, 12, 12, 12, 12, 12, 42, 42, 12, 12, 12, 42, 12, 12, 12, 12, 12, 3, 12, 12, 12, 12, 42, 42, 42, 12, 42, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 5, 12, 12, 12, 12, 12, 12, 12, 0, 43, 43, 43, 96, 43, 104, 43, 31, 125, 31, 125, 138, 87, 125, 149, 138, 125, 87, 87]) # noqa: E231
|
||||
# fmt: on
|
||||
self.assertTrue(torch.allclose(inputs["class_labels"][0], torch.tensor(expected_class_labels)))
|
||||
# fmt: off
|
||||
expected_class_labels = torch.tensor([19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 67, 82, 19, 19, 17, 19, 19, 19, 19, 19, 19, 19, 19, 19, 12, 12, 42, 12, 12, 12, 12, 3, 14, 12, 12, 12, 12, 12, 12, 12, 12, 14, 5, 12, 12, 0, 115, 43, 43, 115, 43, 43, 43, 8, 8, 8, 138, 138, 125, 143]) # noqa: E231
|
||||
# fmt: on
|
||||
self.assertTrue(torch.allclose(inputs["class_labels"][1], expected_class_labels))
|
||||
|
||||
# verify the mask labels
|
||||
self.assertEqual(len(inputs["mask_labels"]), 2)
|
||||
self.assertEqual(inputs["mask_labels"][0].shape, (79, 512, 711))
|
||||
self.assertEqual(inputs["mask_labels"][1].shape, (61, 512, 711))
|
||||
self.assertEquals(inputs["mask_labels"][0].sum().item(), 315193.0)
|
||||
self.assertEquals(inputs["mask_labels"][1].sum().item(), 350747.0)
|
||||
|
||||
def test_binary_mask_to_rle(self):
|
||||
fake_binary_mask = np.zeros((20, 50))
|
||||
fake_binary_mask[0, 20:] = 1
|
||||
|
||||
Reference in New Issue
Block a user