Add Depth Anything V2 Metric models (#32126)

* add checkpoint and repo names

* adapt head to support metric depth estimation

* add max_depth output scaling

* add expected logits

* improve docs

* fix docstring

* add checkpoint and repo names

* adapt head to support metric depth estimation

* add max_depth output scaling

* add expected logits

* improve docs

* fix docstring

* rename depth_estimation to depth_estimation_type

* add integration test

* Refactored tests to include metric depth model inference test
* Integration test pass when the timm backbone lines are commented (L220-L227)

* address feedback

* replace model path to use organization path

* formatting

* delete deprecated TODO

* address feedback

* [run_slow] depth_anything
This commit is contained in:
Bertrand Thia
2024-08-13 10:16:30 -04:00
committed by GitHub
parent 481e15604a
commit cc25757a44
4 changed files with 92 additions and 6 deletions

View File

@@ -246,6 +246,7 @@ def prepare_img():
@slow
class DepthAnythingModelIntegrationTest(unittest.TestCase):
def test_inference(self):
# -- `relative` depth model --
image_processor = DPTImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
model = DepthAnythingForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf").to(torch_device)
@@ -265,4 +266,27 @@ class DepthAnythingModelIntegrationTest(unittest.TestCase):
[[8.8204, 8.6468, 8.6195], [8.3313, 8.6027, 8.7526], [8.6526, 8.6866, 8.7453]],
).to(torch_device)
self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-6))
self.assertTrue(torch.allclose(predicted_depth[0, :3, :3], expected_slice, atol=1e-6))
# -- `metric` depth model --
image_processor = DPTImageProcessor.from_pretrained("depth-anything/depth-anything-V2-metric-indoor-small-hf")
model = DepthAnythingForDepthEstimation.from_pretrained(
"depth-anything/depth-anything-V2-metric-indoor-small-hf"
).to(torch_device)
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
# verify the predicted depth
expected_shape = torch.Size([1, 518, 686])
self.assertEqual(predicted_depth.shape, expected_shape)
expected_slice = torch.tensor(
[[1.3349, 1.2946, 1.2801], [1.2793, 1.2337, 1.2899], [1.2629, 1.2218, 1.2476]],
).to(torch_device)
self.assertTrue(torch.allclose(predicted_depth[0, :3, :3], expected_slice, atol=1e-4))