From f83135eb769aa4eeb53c00179f912b1f46970768 Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Wed, 25 Jan 2023 12:34:43 +0100 Subject: [PATCH] [Mask2Former] Add doc tests (#21232) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add doc tests * Add OneFormer resourcesé * Fix merge * Fix style Co-authored-by: Niels Rogge --- docs/source/en/model_doc/mask2former.mdx | 4 + docs/source/en/model_doc/oneformer.mdx | 9 ++ .../mask2former/modeling_mask2former.py | 97 ++++++++++++++++--- utils/documentation_tests.txt | 2 + 4 files changed, 97 insertions(+), 15 deletions(-) diff --git a/docs/source/en/model_doc/mask2former.mdx b/docs/source/en/model_doc/mask2former.mdx index dd4a5d5233..f0d43ba78f 100644 --- a/docs/source/en/model_doc/mask2former.mdx +++ b/docs/source/en/model_doc/mask2former.mdx @@ -25,6 +25,10 @@ Tips: - Mask2Former uses the same preprocessing and postprocessing steps as [MaskFormer](maskformer). Use [`Mask2FormerImageProcessor`] or [`AutoImageProcessor`] to prepare images and optional targets for the model. - To get the final segmentation, depending on the task, you can call [`~Mask2FormerImageProcessor.post_process_semantic_segmentation`] or [`~Mask2FormerImageProcessor.post_process_instance_segmentation`] or [`~Mask2FormerImageProcessor.post_process_panoptic_segmentation`]. All three tasks can be solved using [`Mask2FormerForUniversalSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together. +drawing + + Mask2Former architecture. Taken from the original paper. + This model was contributed by [Shivalika Singh](https://huggingface.co/shivi) and [Alara Dirik](https://huggingface.co/adirik). The original code can be found [here](https://github.com/facebookresearch/Mask2Former). ## Resources diff --git a/docs/source/en/model_doc/oneformer.mdx b/docs/source/en/model_doc/oneformer.mdx index 85b40ea80d..3560d84bc7 100644 --- a/docs/source/en/model_doc/oneformer.mdx +++ b/docs/source/en/model_doc/oneformer.mdx @@ -37,6 +37,15 @@ The figure below illustrates the architecture of OneFormer. Taken from the [orig This model was contributed by [Jitesh Jain](https://huggingface.co/praeclarumjj3). The original code can be found [here](https://github.com/SHI-Labs/OneFormer). +## Resources + +A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer. + +- Demo notebooks regarding inference + fine-tuning on custom data can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/OneFormer). + +If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it. +The resource should ideally demonstrate something new instead of duplicating an existing resource. + ## OneFormer specific outputs [[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput diff --git a/src/transformers/models/mask2former/modeling_mask2former.py b/src/transformers/models/mask2former/modeling_mask2former.py index 9f6feaa87b..b17fa28746 100644 --- a/src/transformers/models/mask2former/modeling_mask2former.py +++ b/src/transformers/models/mask2former/modeling_mask2former.py @@ -2238,23 +2238,24 @@ class Mask2FormerModel(Mask2FormerPreTrainedModel): >>> import requests >>> from transformers import AutoImageProcessor, Mask2FormerModel - >>> # download texting image + >>> # load image >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) - >>> # Load image preprocessor and Mask2FormerModel trained on ADE20K instance segmentation dataset - >>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-ade-instance") - >>> model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-small-ade-instance") + >>> # load image preprocessor and Mask2FormerModel trained on COCO instance segmentation dataset + >>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-coco-instance") + >>> model = Mask2FormerModel.from_pretrained("facebook/mask2former-swin-small-coco-instance") >>> inputs = image_processor(image, return_tensors="pt") + >>> # forward pass >>> with torch.no_grad(): ... outputs = model(**inputs) + + >>> # model outputs last hidden states of shape (batch_size, num_queries, hidden_size) + >>> print(outputs.transformer_decoder_last_hidden_state.shape) + torch.Size([1, 100, 256]) ``` """ - - if pixel_values is None: - raise ValueError("You have to specify pixel_values") - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -2387,15 +2388,51 @@ class Mask2FormerForUniversalSegmentation(Mask2FormerPreTrainedModel): `Mask2FormerUniversalSegmentationOutput` Examples: + + Instance segmentation example: + ```python >>> from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation >>> from PIL import Image >>> import requests >>> import torch - >>> # Load Mask2Former trained on ADE20K panoptic segmentation dataset - >>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-ade-panoptic") - >>> model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-small-ade-panoptic") + >>> # Load Mask2Former trained on COCO instance segmentation dataset + >>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-coco-instance") + >>> model = Mask2FormerForUniversalSegmentation.from_pretrained( + ... "facebook/mask2former-swin-small-coco-instance" + ... ) + + >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" + >>> image = Image.open(requests.get(url, stream=True).raw) + >>> inputs = image_processor(image, return_tensors="pt") + + >>> with torch.no_grad(): + ... outputs = model(**inputs) + + >>> # Model predicts class_queries_logits of shape `(batch_size, num_queries)` + >>> # and masks_queries_logits of shape `(batch_size, num_queries, height, width)` + >>> class_queries_logits = outputs.class_queries_logits + >>> masks_queries_logits = outputs.masks_queries_logits + + >>> # Perform post-processing to get instance segmentation map + >>> pred_instance_map = image_processor.post_process_semantic_segmentation( + ... outputs, target_sizes=[image.size[::-1]] + ... )[0] + >>> print(pred_instance_map.shape) + torch.Size([480, 640]) + ``` + + Semantic segmentation example: + ```python + >>> from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation + >>> from PIL import Image + >>> import requests + >>> import torch + + >>> # Load Mask2Former trained on ADE20k semantic segmentation dataset + >>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-ade-semantic") + >>> model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-small-ade-semantic") >>> url = ( ... "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg" @@ -2411,16 +2448,46 @@ class Mask2FormerForUniversalSegmentation(Mask2FormerPreTrainedModel): >>> class_queries_logits = outputs.class_queries_logits >>> masks_queries_logits = outputs.masks_queries_logits - >>> # Perform post-processing to get semantic, instance or panoptic segmentation maps + >>> # Perform post-processing to get semantic segmentation map >>> pred_semantic_map = image_processor.post_process_semantic_segmentation( ... outputs, target_sizes=[image.size[::-1]] ... )[0] - >>> pred_instance_map = image_processor.post_process_instance_segmentation( - ... outputs, target_sizes=[image.size[::-1]] - ... )[0]["segmentation"] + >>> print(pred_semantic_map.shape) + torch.Size([512, 683]) + ``` + + Panoptic segmentation example: + + ```python + >>> from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation + >>> from PIL import Image + >>> import requests + >>> import torch + + >>> # Load Mask2Former trained on CityScapes panoptic segmentation dataset + >>> image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-cityscapes-panoptic") + >>> model = Mask2FormerForUniversalSegmentation.from_pretrained( + ... "facebook/mask2former-swin-small-cityscapes-panoptic" + ... ) + + >>> url = "https://cdn-media.huggingface.co/Inference-API/Sample-results-on-the-Cityscapes-dataset-The-above-images-show-how-our-method-can-handle.png" + >>> image = Image.open(requests.get(url, stream=True).raw) + >>> inputs = image_processor(image, return_tensors="pt") + + >>> with torch.no_grad(): + ... outputs = model(**inputs) + + >>> # Model predicts class_queries_logits of shape `(batch_size, num_queries)` + >>> # and masks_queries_logits of shape `(batch_size, num_queries, height, width)` + >>> class_queries_logits = outputs.class_queries_logits + >>> masks_queries_logits = outputs.masks_queries_logits + + >>> # Perform post-processing to get panoptic segmentation map >>> pred_panoptic_map = image_processor.post_process_panoptic_segmentation( ... outputs, target_sizes=[image.size[::-1]] ... )[0]["segmentation"] + >>> print(pred_panoptic_map.shape) + torch.Size([338, 676]) ``` """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index 8f009ab4dc..0679e79862 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -108,6 +108,8 @@ src/transformers/models/longformer/modeling_tf_longformer.py src/transformers/models/longt5/modeling_longt5.py src/transformers/models/marian/modeling_marian.py src/transformers/models/markuplm/modeling_markuplm.py +src/transformers/models/maskformer/configuration_mask2former.py +src/transformers/models/maskformer/modeling_mask2former.py src/transformers/models/maskformer/configuration_maskformer.py src/transformers/models/maskformer/modeling_maskformer.py src/transformers/models/mbart/configuration_mbart.py