[ViTHybrid] Fix accelerate slow tests (#20679)
* fix failing `accelerate` tests * make fixup * smaller values * even lower
This commit is contained in:
@@ -19,7 +19,7 @@ import inspect
|
||||
import unittest
|
||||
|
||||
from transformers import ViTHybridConfig
|
||||
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import require_accelerate, require_torch, require_vision, slow, torch_device
|
||||
from transformers.utils import cached_property, is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -57,6 +57,7 @@ class ViTHybridModelTester:
|
||||
attention_probs_dropout_prob=0.1,
|
||||
type_sequence_label_size=10,
|
||||
initializer_range=0.02,
|
||||
backbone_featmap_shape=[1, 16, 4, 4],
|
||||
scope=None,
|
||||
):
|
||||
self.parent = parent
|
||||
@@ -76,6 +77,7 @@ class ViTHybridModelTester:
|
||||
self.type_sequence_label_size = type_sequence_label_size
|
||||
self.initializer_range = initializer_range
|
||||
self.scope = scope
|
||||
self.backbone_featmap_shape = backbone_featmap_shape
|
||||
|
||||
# in ViT hybrid, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
|
||||
# the number of patches is based on the feature map of the backbone, which by default uses an output stride
|
||||
@@ -95,6 +97,16 @@ class ViTHybridModelTester:
|
||||
return config, pixel_values, labels
|
||||
|
||||
def get_config(self):
|
||||
backbone_config = {
|
||||
"global_padding": "same",
|
||||
"layer_type": "bottleneck",
|
||||
"depths": [3, 4, 9],
|
||||
"out_features": ["stage1", "stage2", "stage3"],
|
||||
"embedding_dynamic_padding": True,
|
||||
"hidden_sizes": [4, 8, 16, 32],
|
||||
"num_groups": 2,
|
||||
}
|
||||
|
||||
return ViTHybridConfig(
|
||||
image_size=self.image_size,
|
||||
patch_size=self.patch_size,
|
||||
@@ -108,6 +120,8 @@ class ViTHybridModelTester:
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
is_decoder=False,
|
||||
initializer_range=self.initializer_range,
|
||||
backbone_featmap_shape=self.backbone_featmap_shape,
|
||||
backbone_config=backbone_config,
|
||||
)
|
||||
|
||||
def create_and_check_model(self, config, pixel_values, labels):
|
||||
@@ -229,3 +243,19 @@ class ViTModelIntegrationTest(unittest.TestCase):
|
||||
expected_slice = torch.tensor([-1.9090, -0.4993, -0.2389]).to(torch_device)
|
||||
|
||||
self.assertTrue(torch.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4))
|
||||
|
||||
@slow
|
||||
@require_accelerate
|
||||
def test_accelerate_inference(self):
|
||||
feature_extractor = ViTHybridImageProcessor.from_pretrained("google/vit-hybrid-base-bit-384")
|
||||
model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", device_map="auto")
|
||||
|
||||
image = prepare_img()
|
||||
|
||||
inputs = feature_extractor(images=image, return_tensors="pt")
|
||||
outputs = model(**inputs)
|
||||
logits = outputs.logits
|
||||
# model predicts one of the 1000 ImageNet classes
|
||||
predicted_class_idx = logits.argmax(-1).item()
|
||||
|
||||
self.assertTrue(model.config.id2label[predicted_class_idx], "tabby, tabby cat")
|
||||
|
||||
Reference in New Issue
Block a user