Add DINOv2 depth estimation (#26092)

* First draft * Fix style * More improvements * Fix tests * Fix tests * Convert checkpoint * Improve DPTImageProcessor * Remove scripts, improve conversion script * Remove print statements * Fix test * Improve docstring * More improvements * Fix style * Fix image processor * Add tests * Address comments * Address comments * Make bias backwards compatible * Address comment * Address comment * Address comment * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Address comments * Add flag * Add tests * Make tests smaller * Use regular BackboneOutput * Fix all tests * Update test * Convert more checkpoints * Convert giant checkpoints, add integration test * Rename size_divisibility to size_divisor --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2023-11-13 17:20:42 +01:00
parent 3b59621310
commit 2422c38de6
8 changed files with 928 additions and 75 deletions
--- a/tests/models/dpt/test_image_processing_dpt.py
+++ b/tests/models/dpt/test_image_processing_dpt.py
@@ -16,6 +16,8 @@

 import unittest

+import numpy as np
+
 from transformers.file_utils import is_vision_available
 from transformers.testing_utils import require_torch, require_vision

@@ -97,6 +99,10 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
        self.assertTrue(hasattr(image_processing, "do_normalize"))
        self.assertTrue(hasattr(image_processing, "do_resize"))
        self.assertTrue(hasattr(image_processing, "size"))
+        self.assertTrue(hasattr(image_processing, "do_rescale"))
+        self.assertTrue(hasattr(image_processing, "rescale_factor"))
+        self.assertTrue(hasattr(image_processing, "do_pad"))
+        self.assertTrue(hasattr(image_processing, "size_divisor"))

    def test_image_processor_from_dict_with_kwargs(self):
        image_processor = self.image_processing_class.from_dict(self.image_processor_dict)
@@ -104,3 +110,19 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):

        image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
        self.assertEqual(image_processor.size, {"height": 42, "width": 42})
+
+    def test_padding(self):
+        image_processing = self.image_processing_class(**self.image_processor_dict)
+        image = np.random.randn(3, 249, 491)
+
+        # test individual method
+        image = image_processing.pad_image(image, size_divisor=4)
+        self.assertTrue(image.shape[1] % 4 == 0)
+        self.assertTrue(image.shape[2] % 4 == 0)
+
+        # test by calling
+        pixel_values = image_processing.preprocess(
+            image, do_rescale=False, do_resize=False, do_pad=True, size_divisor=4, return_tensors="pt"
+        ).pixel_values
+        self.assertTrue(pixel_values.shape[2] % 4 == 0)
+        self.assertTrue(pixel_values.shape[3] % 4 == 0)