Enable image-segmentation on AutoModelForSemanticSegmentation (#15647)

* Enabling Beit SegFormer to `image-segmentation`.

* Fixing the score.

* Fix import ?

* Missing in type hint.

* Multiple test fixes:

- Add `raw_image` support. It should be the default IMHO since in Python
  world it doesn't make any sense to base64 encode the image (Sorry
  @mishig, didn't catch that in my review). I really think we should
  consider breaking BC here.
- Add support for Segformer tiny test (needed
  `SegformerModelTester.get_config` to enable TinyConfig
  @NielsRogge)
- Add the check that `batch_size` works correctly on that pipeline.
  Uncovered that it doesn't for Detr, which IMO is OK since images
  after `feature_extractor` don't have the same size. Comment should
  explain.

* Type hint as a string.

* Make fixup + update black.

* torch+vision protections.

* Don't use torchvision, use F.interpolate instead (no new dep).

* Last fixes for Segformer.

* Update test to reflect new image (which was broken)

* Update tests.

* Major BC modification:

- Removed the string compressed PNG string, that's a job for users
`transformers` stays in python land.
- Removed the `score` for semantic segmentation. It has hardly a meaning
  on its own in this context.
- Don't include the grayscale with logits for now (which could enable
  users to get a sense of confidence). Might be done later.
- Don't include the surface of the mask (could be used for sorting by
  users, to filter out small masks). It's already calculable, and
  it's easier to add later, than to add now and break later if we need.

* `make fixup`.

* Small changes.

* Rebase + doc fixup.
This commit is contained in:
Nicolas Patry
2022-02-23 17:20:26 +01:00
committed by GitHub
parent 1b23979736
commit 9e71d46455
7 changed files with 176 additions and 83 deletions

View File

@@ -101,7 +101,11 @@ class SegformerModelTester:
if self.use_labels:
labels = ids_tensor([self.batch_size, self.image_size, self.image_size], self.num_labels)
config = SegformerConfig(
config = self.get_config()
return config, pixel_values, labels
def get_config(self):
return SegformerConfig(
image_size=self.image_size,
num_channels=self.num_channels,
num_encoder_blocks=self.num_encoder_blocks,
@@ -114,8 +118,6 @@ class SegformerModelTester:
initializer_range=self.initializer_range,
)
return config, pixel_values, labels
def create_and_check_model(self, config, pixel_values, labels):
model = SegformerModel(config=config)
model.to(torch_device)

View File

@@ -126,14 +126,14 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
class ANY:
def __init__(self, _type):
self._type = _type
def __init__(self, *_types):
self._types = _types
def __eq__(self, other):
return isinstance(other, self._type)
return isinstance(other, self._types)
def __repr__(self):
return f"ANY({self._type.__name__})"
return f"ANY({', '.join(_type.__name__ for _type in self._types)})"
class PipelineTestCaseMeta(type):

View File

@@ -15,10 +15,14 @@
import hashlib
import unittest
import datasets
from transformers import (
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
AutoFeatureExtractor,
AutoModelForImageSegmentation,
DetrForSegmentation,
ImageSegmentationPipeline,
is_vision_available,
pipeline,
@@ -46,12 +50,23 @@ else:
pass
def hashimage(image: Image) -> str:
m = hashlib.md5(image.tobytes())
return m.hexdigest()
@require_vision
@require_timm
@require_torch
@is_pipeline_test
class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_IMAGE_SEGMENTATION_MAPPING
model_mapping = {
k: v
for k, v in (
list(MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items()) if MODEL_FOR_IMAGE_SEGMENTATION_MAPPING else []
)
+ (MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() if MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING else [])
}
def get_test_pipeline(self, model, tokenizer, feature_extractor):
image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor)
@@ -62,34 +77,59 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
def run_pipeline_test(self, image_segmenter, examples):
outputs = image_segmenter("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
self.assertEqual(outputs, [{"score": ANY(float), "label": ANY(str), "mask": ANY(str)}] * 12)
import datasets
self.assertIsInstance(outputs, list)
n = len(outputs)
self.assertGreater(n, 1)
# XXX: PIL.Image implements __eq__ which bypasses ANY, so we inverse the comparison
# to make it work
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n, outputs)
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
batch = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
# RGBA
dataset[0]["file"],
# LA
dataset[1]["file"],
# L
dataset[2]["file"],
]
outputs = image_segmenter(batch, threshold=0.0)
# RGBA
outputs = image_segmenter(dataset[0]["file"])
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
# LA
outputs = image_segmenter(dataset[1]["file"])
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
# L
outputs = image_segmenter(dataset[2]["file"])
m = len(outputs)
self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
if isinstance(image_segmenter.model, DetrForSegmentation):
# We need to test batch_size with images with the same size.
# Detr doesn't normalize the size of the images, meaning we can have
# 800x800 or 800x1200, meaning we cannot batch simply.
# We simply bail on this
batch_size = 1
else:
batch_size = 2
# 5 times the same image so the output shape is predictable
batch = [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
]
outputs = image_segmenter(batch, threshold=0.0, batch_size=batch_size)
self.assertEqual(len(batch), len(outputs))
self.assertEqual({"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}, outputs[0][0])
self.assertEqual(len(outputs[0]), n)
self.assertEqual(
outputs,
[
[{"score": ANY(float), "label": ANY(str), "mask": ANY(str)}] * 12,
[{"score": ANY(float), "label": ANY(str), "mask": ANY(str)}] * 12,
[{"score": ANY(float), "label": ANY(str), "mask": ANY(str)}] * 12,
[{"score": ANY(float), "label": ANY(str), "mask": ANY(str)}] * 12,
[{"score": ANY(float), "label": ANY(str), "mask": ANY(str)}] * 12,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
[{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
],
outputs,
f"Expected [{n}, {n}, {n}, {n}, {n}], got {[len(item) for item in outputs]}",
)
@require_tf
@@ -108,7 +148,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.0)
for o in outputs:
# shortening by hashing
o["mask"] = hashlib.sha1(o["mask"].encode("UTF-8")).hexdigest()
o["mask"] = hashimage(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
@@ -116,12 +156,12 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
{
"score": 0.004,
"label": "LABEL_0",
"mask": "4276f7db4ca2983b2666f7e0c102d8186aed20be",
"mask": "34eecd16bbfb0f476083ef947d81bf66",
},
{
"score": 0.004,
"label": "LABEL_0",
"mask": "4276f7db4ca2983b2666f7e0c102d8186aed20be",
"mask": "34eecd16bbfb0f476083ef947d81bf66",
},
],
)
@@ -135,7 +175,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
)
for output in outputs:
for o in output:
o["mask"] = hashlib.sha1(o["mask"].encode("UTF-8")).hexdigest()
o["mask"] = hashimage(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
@@ -144,29 +184,54 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
{
"score": 0.004,
"label": "LABEL_0",
"mask": "4276f7db4ca2983b2666f7e0c102d8186aed20be",
"mask": "34eecd16bbfb0f476083ef947d81bf66",
},
{
"score": 0.004,
"label": "LABEL_0",
"mask": "4276f7db4ca2983b2666f7e0c102d8186aed20be",
"mask": "34eecd16bbfb0f476083ef947d81bf66",
},
],
[
{
"score": 0.004,
"label": "LABEL_0",
"mask": "4276f7db4ca2983b2666f7e0c102d8186aed20be",
"mask": "34eecd16bbfb0f476083ef947d81bf66",
},
{
"score": 0.004,
"label": "LABEL_0",
"mask": "4276f7db4ca2983b2666f7e0c102d8186aed20be",
"mask": "34eecd16bbfb0f476083ef947d81bf66",
},
],
],
)
@require_torch
def test_small_model_pt_semantic(self):
model_id = "hf-internal-testing/tiny-random-beit-pipeline"
image_segmenter = pipeline(model=model_id)
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg")
for o in outputs:
# shortening by hashing
o["mask"] = hashimage(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
[
{
"score": None,
"label": "LABEL_0",
"mask": "01245d8ad25d03f09493ca97965788ae",
},
{
"score": None,
"label": "LABEL_1",
"mask": "f741516de8d5196a2c830739b9ac1c8c",
},
],
)
@require_torch
@slow
def test_integration_torch_image_segmentation(self):
@@ -176,7 +241,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg")
for o in outputs:
o["mask"] = hashlib.sha1(o["mask"].encode("UTF-8")).hexdigest()
o["mask"] = hashimage(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),
@@ -234,7 +299,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=threshold)
for o in outputs:
o["mask"] = hashlib.sha1(o["mask"].encode("UTF-8")).hexdigest()
o["mask"] = hashimage(o["mask"])
self.assertEqual(
nested_simplify(outputs, decimals=4),