Fix tests for vision models (#35654)

* Trigger tests

* [run-slow] beit, detr, dinov2, vit, textnet

* Fix BEiT interpolate_pos_encoding

* Fix DETR test

* Update DINOv2 test

* Fix textnet

* Fix vit

* Fix DPT

* fix data2vec test

* Fix textnet test

* Update interpolation check

* Fix ZoeDepth tests

* Update interpolate embeddings for BEiT

* Apply suggestions from code review
This commit is contained in:
Pavel Iakubovskii
2025-02-13 10:28:37 +00:00
committed by GitHub
parent e60ae0d078
commit d419862889
9 changed files with 55 additions and 79 deletions

View File

@@ -328,14 +328,18 @@ class TextNetModelIntegrationTest(unittest.TestCase):
with torch.no_grad():
output = model(**inputs)
# verify logits
self.assertEqual(output.logits.shape, torch.Size([1, 2]))
# verify output
self.assertEqual(output.last_hidden_state.shape, torch.Size([1, 512, 20, 27]))
expected_slice_backbone = torch.tensor(
[0.9210, 0.6099, 0.0000, 0.0000, 0.0000, 0.0000, 3.2207, 2.6602, 1.8925, 0.0000],
[
[0.0000, 1.7415, 1.2660],
[0.0000, 1.0084, 1.9692],
[0.0000, 1.7464, 1.7892],
],
device=torch_device,
)
torch.testing.assert_close(
output.feature_maps[-1][0][10][12][:10], expected_slice_backbone, rtol=1e-3, atol=1e-3
output.last_hidden_state[0, 12, :3, :3], expected_slice_backbone, rtol=1e-2, atol=1e-2
)