Add Swinv2 backbone (#27742)

* First draft

* More improvements

* More improvements

* Make all tests pass

* Remove script

* Update image processor

* Address comments

* Use new gradient checkpointing method

* Convert checkpoints, add integration test

* Do not keep aspect ratio for now

* Set keep_aspect_ratio=False for beit, add integration test

* Remove print statement
This commit is contained in:
NielsRogge
2023-12-22 12:12:56 +01:00
committed by GitHub
parent 1ef86c4f56
commit c9fb250a25
14 changed files with 667 additions and 130 deletions

View File

@@ -126,3 +126,13 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
).pixel_values
self.assertTrue(pixel_values.shape[2] % 4 == 0)
self.assertTrue(pixel_values.shape[3] % 4 == 0)
def test_keep_aspect_ratio(self):
size = {"height": 512, "width": 512}
image_processor = DPTImageProcessor(size=size, keep_aspect_ratio=True, ensure_multiple_of=32)
image = np.zeros((489, 640, 3))
pixel_values = image_processor(image, return_tensors="pt").pixel_values
self.assertEqual(list(pixel_values.shape), [1, 3, 512, 672])

View File

@@ -258,7 +258,7 @@ def prepare_img():
@require_vision
@slow
class DPTModelIntegrationTest(unittest.TestCase):
def test_inference_depth_estimation(self):
def test_inference_depth_estimation_dinov2(self):
image_processor = DPTImageProcessor.from_pretrained("facebook/dpt-dinov2-small-kitti")
model = DPTForDepthEstimation.from_pretrained("facebook/dpt-dinov2-small-kitti").to(torch_device)
@@ -279,3 +279,47 @@ class DPTModelIntegrationTest(unittest.TestCase):
).to(torch_device)
self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-4))
def test_inference_depth_estimation_beit(self):
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-beit-base-384")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-beit-base-384").to(torch_device)
image = prepare_img()
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
# verify the predicted depth
expected_shape = torch.Size((1, 384, 384))
self.assertEqual(predicted_depth.shape, expected_shape)
expected_slice = torch.tensor(
[[2669.7061, 2663.7144, 2674.9399], [2633.9326, 2650.9092, 2665.4270], [2621.8271, 2632.0129, 2637.2290]]
).to(torch_device)
self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-4))
def test_inference_depth_estimation_swinv2(self):
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256").to(torch_device)
image = prepare_img()
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
# verify the predicted depth
expected_shape = torch.Size((1, 256, 256))
self.assertEqual(predicted_depth.shape, expected_shape)
expected_slice = torch.tensor(
[[1032.7719, 1025.1886, 1030.2661], [1023.7619, 1021.0075, 1024.9121], [1022.5667, 1018.8522, 1021.4145]]
).to(torch_device)
self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-4))