Add Fast Grounding-Dino Processor (#37108)

* Add Fast Grounding-Dino Processor

* Added modular file

---------

Co-authored-by: Yoni Gozlan <74535834+yonigozlan@users.noreply.github.com>
This commit is contained in:
Parteek
2025-04-16 15:56:08 +05:30
committed by GitHub
parent ed53809ac5
commit 6fd87d1172
6 changed files with 976 additions and 13 deletions

View File

@@ -20,7 +20,7 @@ import unittest
import numpy as np
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
from transformers.utils import is_torch_available, is_torchvision_available, is_vision_available
from ...test_image_processing_common import AnnotationFormatTestMixin, ImageProcessingTestMixin, prepare_image_inputs
@@ -35,6 +35,9 @@ if is_vision_available():
from transformers import GroundingDinoImageProcessor
if is_torchvision_available():
from transformers import GroundingDinoImageProcessorFast
class GroundingDinoImageProcessingTester:
def __init__(
@@ -147,6 +150,7 @@ class GroundingDinoImageProcessingTester:
@require_vision
class GroundingDinoImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = GroundingDinoImageProcessor if is_vision_available() else None
fast_image_processing_class = GroundingDinoImageProcessorFast if is_torchvision_available() else None
def setUp(self):
super().setUp()
@@ -182,20 +186,21 @@ class GroundingDinoImageProcessingTest(AnnotationFormatTestMixin, ImageProcessin
self.assertEqual(image_processor.do_pad, False)
def test_post_process_object_detection(self):
image_processor = self.image_processing_class(**self.image_processor_dict)
outputs = self.image_processor_tester.get_fake_grounding_dino_output()
results = image_processor.post_process_object_detection(outputs, threshold=0.0)
for image_processing_class in self.image_processor_list:
image_processor = image_processing_class(**self.image_processor_dict)
outputs = self.image_processor_tester.get_fake_grounding_dino_output()
results = image_processor.post_process_object_detection(outputs, threshold=0.0)
self.assertEqual(len(results), self.image_processor_tester.batch_size)
self.assertEqual(list(results[0].keys()), ["scores", "labels", "boxes"])
self.assertEqual(results[0]["boxes"].shape, (self.image_processor_tester.num_queries, 4))
self.assertEqual(results[0]["scores"].shape, (self.image_processor_tester.num_queries,))
self.assertEqual(len(results), self.image_processor_tester.batch_size)
self.assertEqual(list(results[0].keys()), ["scores", "labels", "boxes"])
self.assertEqual(results[0]["boxes"].shape, (self.image_processor_tester.num_queries, 4))
self.assertEqual(results[0]["scores"].shape, (self.image_processor_tester.num_queries,))
expected_scores = torch.tensor([0.7050, 0.7222, 0.7222, 0.6829, 0.7220])
torch.testing.assert_close(results[0]["scores"], expected_scores, rtol=1e-4, atol=1e-4)
expected_scores = torch.tensor([0.7050, 0.7222, 0.7222, 0.6829, 0.7220])
torch.testing.assert_close(results[0]["scores"], expected_scores, rtol=1e-4, atol=1e-4)
expected_box_slice = torch.tensor([0.6908, 0.4354, 1.0737, 1.3947])
torch.testing.assert_close(results[0]["boxes"][0], expected_box_slice, rtol=1e-4, atol=1e-4)
expected_box_slice = torch.tensor([0.6908, 0.4354, 1.0737, 1.3947])
torch.testing.assert_close(results[0]["boxes"][0], expected_box_slice, rtol=1e-4, atol=1e-4)
@slow
# Copied from tests.models.deformable_detr.test_image_processing_deformable_detr.DeformableDetrImageProcessingTest.test_call_pytorch_with_coco_detection_annotations with DeformableDetr->GroundingDino