Fix tests for vision models (#35654)
* Trigger tests * [run-slow] beit, detr, dinov2, vit, textnet * Fix BEiT interpolate_pos_encoding * Fix DETR test * Update DINOv2 test * Fix textnet * Fix vit * Fix DPT * fix data2vec test * Fix textnet test * Update interpolation check * Fix ZoeDepth tests * Update interpolate embeddings for BEiT * Apply suggestions from code review
This commit is contained in:
committed by
GitHub
parent
e60ae0d078
commit
d419862889
@@ -565,17 +565,12 @@ class Data2VecVisionModelIntegrationTest(unittest.TestCase):
|
||||
inputs = processor(images=image, return_tensors="pt", size={"height": 480, "width": 480})
|
||||
pixel_values = inputs.pixel_values.to(torch_device)
|
||||
|
||||
# with interpolate_pos_encoding being False an exception should be raised with higher resolution
|
||||
# images than what the model supports.
|
||||
self.assertFalse(processor.do_center_crop)
|
||||
with torch.no_grad():
|
||||
with self.assertRaises(ValueError, msg="doesn't match model"):
|
||||
model(pixel_values, interpolate_pos_encoding=False)
|
||||
|
||||
# with interpolate_pos_encoding being True the model should process the higher resolution image
|
||||
# successfully and produce the expected output.
|
||||
with torch.no_grad():
|
||||
outputs = model(pixel_values, interpolate_pos_encoding=True)
|
||||
|
||||
expected_shape = torch.Size((1, 1801, 768))
|
||||
# num_cls_tokens + (height / patch_size) * (width / patch_size)
|
||||
# 1 + (480 / 16) * (480 / 16) = 901
|
||||
expected_shape = torch.Size((1, 901, 768))
|
||||
self.assertEqual(outputs.last_hidden_state.shape, expected_shape)
|
||||
|
||||
Reference in New Issue
Block a user