Add Swinv2 backbone (#27742)
* First draft * More improvements * More improvements * Make all tests pass * Remove script * Update image processor * Address comments * Use new gradient checkpointing method * Convert checkpoints, add integration test * Do not keep aspect ratio for now * Set keep_aspect_ratio=False for beit, add integration test * Remove print statement
This commit is contained in:
@@ -126,3 +126,13 @@ class DPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
).pixel_values
|
||||
self.assertTrue(pixel_values.shape[2] % 4 == 0)
|
||||
self.assertTrue(pixel_values.shape[3] % 4 == 0)
|
||||
|
||||
def test_keep_aspect_ratio(self):
|
||||
size = {"height": 512, "width": 512}
|
||||
image_processor = DPTImageProcessor(size=size, keep_aspect_ratio=True, ensure_multiple_of=32)
|
||||
|
||||
image = np.zeros((489, 640, 3))
|
||||
|
||||
pixel_values = image_processor(image, return_tensors="pt").pixel_values
|
||||
|
||||
self.assertEqual(list(pixel_values.shape), [1, 3, 512, 672])
|
||||
|
||||
@@ -258,7 +258,7 @@ def prepare_img():
|
||||
@require_vision
|
||||
@slow
|
||||
class DPTModelIntegrationTest(unittest.TestCase):
|
||||
def test_inference_depth_estimation(self):
|
||||
def test_inference_depth_estimation_dinov2(self):
|
||||
image_processor = DPTImageProcessor.from_pretrained("facebook/dpt-dinov2-small-kitti")
|
||||
model = DPTForDepthEstimation.from_pretrained("facebook/dpt-dinov2-small-kitti").to(torch_device)
|
||||
|
||||
@@ -279,3 +279,47 @@ class DPTModelIntegrationTest(unittest.TestCase):
|
||||
).to(torch_device)
|
||||
|
||||
self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-4))
|
||||
|
||||
def test_inference_depth_estimation_beit(self):
|
||||
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-beit-base-384")
|
||||
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-beit-base-384").to(torch_device)
|
||||
|
||||
image = prepare_img()
|
||||
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
||||
|
||||
# forward pass
|
||||
with torch.no_grad():
|
||||
outputs = model(**inputs)
|
||||
predicted_depth = outputs.predicted_depth
|
||||
|
||||
# verify the predicted depth
|
||||
expected_shape = torch.Size((1, 384, 384))
|
||||
self.assertEqual(predicted_depth.shape, expected_shape)
|
||||
|
||||
expected_slice = torch.tensor(
|
||||
[[2669.7061, 2663.7144, 2674.9399], [2633.9326, 2650.9092, 2665.4270], [2621.8271, 2632.0129, 2637.2290]]
|
||||
).to(torch_device)
|
||||
|
||||
self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-4))
|
||||
|
||||
def test_inference_depth_estimation_swinv2(self):
|
||||
image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
|
||||
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256").to(torch_device)
|
||||
|
||||
image = prepare_img()
|
||||
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
||||
|
||||
# forward pass
|
||||
with torch.no_grad():
|
||||
outputs = model(**inputs)
|
||||
predicted_depth = outputs.predicted_depth
|
||||
|
||||
# verify the predicted depth
|
||||
expected_shape = torch.Size((1, 256, 256))
|
||||
self.assertEqual(predicted_depth.shape, expected_shape)
|
||||
|
||||
expected_slice = torch.tensor(
|
||||
[[1032.7719, 1025.1886, 1030.2661], [1023.7619, 1021.0075, 1024.9121], [1022.5667, 1018.8522, 1021.4145]]
|
||||
).to(torch_device)
|
||||
|
||||
self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-4))
|
||||
|
||||
Reference in New Issue
Block a user