Fix tests for vision models (#35654)

* Trigger tests * [run-slow] beit, detr, dinov2, vit, textnet * Fix BEiT interpolate_pos_encoding * Fix DETR test * Update DINOv2 test * Fix textnet * Fix vit * Fix DPT * fix data2vec test * Fix textnet test * Update interpolation check * Fix ZoeDepth tests * Update interpolate embeddings for BEiT * Apply suggestions from code review
2025-02-13 10:28:37 +00:00
parent e60ae0d078
commit d419862889
9 changed files with 55 additions and 79 deletions
--- a/tests/models/beit/test_modeling_beit.py
+++ b/tests/models/beit/test_modeling_beit.py
@@ -774,7 +774,9 @@ class BeitModelIntegrationTest(unittest.TestCase):
        with torch.no_grad():
            outputs = model(pixel_values, interpolate_pos_encoding=True)

-        expected_shape = torch.Size((1, 1801, 768))
+        # num_cls_tokens + (height / patch_size) * (width / patch_size)
+        # 1 + (480 / 16) * (480 / 16) = 1 + 30 * 30 = 901
+        expected_shape = torch.Size((1, 901, 768))
        self.assertEqual(outputs.last_hidden_state.shape, expected_shape)