Fix tests for vision models (#35654)

* Trigger tests * [run-slow] beit, detr, dinov2, vit, textnet * Fix BEiT interpolate_pos_encoding * Fix DETR test * Update DINOv2 test * Fix textnet * Fix vit * Fix DPT * fix data2vec test * Fix textnet test * Update interpolation check * Fix ZoeDepth tests * Update interpolate embeddings for BEiT * Apply suggestions from code review
2025-02-13 10:28:37 +00:00
parent e60ae0d078
commit d419862889
9 changed files with 55 additions and 79 deletions
--- a/tests/models/textnet/test_modeling_textnet.py
+++ b/tests/models/textnet/test_modeling_textnet.py
@@ -328,14 +328,18 @@ class TextNetModelIntegrationTest(unittest.TestCase):
        with torch.no_grad():
            output = model(**inputs)

-        # verify logits
-        self.assertEqual(output.logits.shape, torch.Size([1, 2]))
+        # verify output
+        self.assertEqual(output.last_hidden_state.shape, torch.Size([1, 512, 20, 27]))
        expected_slice_backbone = torch.tensor(
-            [0.9210, 0.6099, 0.0000, 0.0000, 0.0000, 0.0000, 3.2207, 2.6602, 1.8925, 0.0000],
+            [
+                [0.0000, 1.7415, 1.2660],
+                [0.0000, 1.0084, 1.9692],
+                [0.0000, 1.7464, 1.7892],
+            ],
            device=torch_device,
        )
        torch.testing.assert_close(
-            output.feature_maps[-1][0][10][12][:10], expected_slice_backbone, rtol=1e-3, atol=1e-3
+            output.last_hidden_state[0, 12, :3, :3], expected_slice_backbone, rtol=1e-2, atol=1e-2
        )