Add DINOv2 depth estimation (#26092)

* First draft

* Fix style

* More improvements

* Fix tests

* Fix tests

* Convert checkpoint

* Improve DPTImageProcessor

* Remove scripts, improve conversion script

* Remove print statements

* Fix test

* Improve docstring

* More improvements

* Fix style

* Fix image processor

* Add tests

* Address comments

* Address comments

* Make bias backwards compatible

* Address comment

* Address comment

* Address comment

* Apply suggestions from code review

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Address comments

* Add flag

* Add tests

* Make tests smaller

* Use regular BackboneOutput

* Fix all tests

* Update test

* Convert more checkpoints

* Convert giant checkpoints, add integration test

* Rename size_divisibility to size_divisor

---------

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
NielsRogge
2023-11-13 17:20:42 +01:00
committed by GitHub
parent 3b59621310
commit 2422c38de6
8 changed files with 928 additions and 75 deletions

View File

@@ -16,6 +16,8 @@
import unittest
import numpy as np
from transformers.file_utils import is_vision_available
from transformers.testing_utils import require_torch, require_vision
@@ -97,6 +99,10 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertTrue(hasattr(image_processing, "do_normalize"))
self.assertTrue(hasattr(image_processing, "do_resize"))
self.assertTrue(hasattr(image_processing, "size"))
self.assertTrue(hasattr(image_processing, "do_rescale"))
self.assertTrue(hasattr(image_processing, "rescale_factor"))
self.assertTrue(hasattr(image_processing, "do_pad"))
self.assertTrue(hasattr(image_processing, "size_divisor"))
def test_image_processor_from_dict_with_kwargs(self):
image_processor = self.image_processing_class.from_dict(self.image_processor_dict)
@@ -104,3 +110,19 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
def test_padding(self):
image_processing = self.image_processing_class(**self.image_processor_dict)
image = np.random.randn(3, 249, 491)
# test individual method
image = image_processing.pad_image(image, size_divisor=4)
self.assertTrue(image.shape[1] % 4 == 0)
self.assertTrue(image.shape[2] % 4 == 0)
# test by calling
pixel_values = image_processing.preprocess(
image, do_rescale=False, do_resize=False, do_pad=True, size_divisor=4, return_tensors="pt"
).pixel_values
self.assertTrue(pixel_values.shape[2] % 4 == 0)
self.assertTrue(pixel_values.shape[3] % 4 == 0)