🚨🚨🚨 [eomt] make EoMT compatible with pipeline (#39122)

* Make EoMT compatible with pipeline * Implicit patch offsets * remove patch offsets from arg * Modify tests * Update example * fix proc testcase * Add few more args * add pipeline test suite * fix * docstring fixes * add pipeline test * changes w.r.t review * 🙈 MB * should fix device mismatch * debug * Fixes device mismatch * use decorator * we can split mlp * expected values update --------- Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com>
2025-07-02 16:55:26 +05:30
parent 4d5822e65d
commit b61023a1b7
7 changed files with 113 additions and 92 deletions
--- a/docs/source/en/model_doc/eomt.md
+++ b/docs/source/en/model_doc/eomt.md
@@ -74,20 +74,16 @@ inputs = processor(
    return_tensors="pt",
 )

-# Remove Patch Offsets from inputs — only used later for post-processing.
-patch_offsets = inputs.pop("patch_offsets")
-
 with torch.inference_mode():
    outputs = model(**inputs)

 # Prepare the original image size in the format (height, width)
-original_image_sizes = [(image.height, image.width)]
+target_sizes = [(image.height, image.width)]

 # Post-process the model outputs to get final segmentation prediction
 preds = processor.post_process_semantic_segmentation(
    outputs,
-    patch_offsets=patch_offsets,
-    original_image_sizes=original_image_sizes,
+    target_sizes=target_sizes,
 )

 # Visualize the segmentation mask
@@ -130,12 +126,12 @@ with torch.inference_mode():
    outputs = model(**inputs)

 # Prepare the original image size in the format (height, width)
-original_image_sizes = [(image.height, image.width)]
+target_sizes = [(image.height, image.width)]

 # Post-process the model outputs to get final segmentation prediction
 preds = processor.post_process_instance_segmentation(
    outputs,
-    original_image_sizes=original_image_sizes,
+    target_sizes=target_sizes,
 )

 # Visualize the segmentation mask
@@ -173,12 +169,12 @@ with torch.inference_mode():
    outputs = model(**inputs)

 # Prepare the original image size in the format (height, width)
-original_image_sizes = [(image.height, image.width)]
+target_sizes = [(image.height, image.width)]

 # Post-process the model outputs to get final segmentation prediction
 preds = processor.post_process_panoptic_segmentation(
    outputs,
-    original_image_sizes=original_image_sizes,
+    target_sizes=target_sizes,
 )

 # Visualize the panoptic segmentation mask