Improve vision models (#17731)

* Improve vision models * Add a lot of improvements * Remove to_2tuple from swin tests * Fix TF Swin * Fix more tests * Fix copies * Improve more models * Fix ViTMAE test * Add channel check for TF models * Add proper channel check for TF models * Apply suggestion from code review * Apply suggestions from code review * Add channel check for Flax models, apply suggestion * Fix bug * Add tests for greyscale images * Add test for interpolation of pos encodigns Co-authored-by: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
2022-06-24 11:34:51 +02:00
parent 893ab12452
commit 0917870510
39 changed files with 801 additions and 916 deletions
--- a/tests/models/yolos/test_modeling_yolos.py
+++ b/tests/models/yolos/test_modeling_yolos.py
@@ -31,7 +31,7 @@ if is_torch_available():
    from torch import nn

    from transformers import YolosForObjectDetection, YolosModel
-    from transformers.models.yolos.modeling_yolos import YOLOS_PRETRAINED_MODEL_ARCHIVE_LIST, to_2tuple
+    from transformers.models.yolos.modeling_yolos import YOLOS_PRETRAINED_MODEL_ARCHIVE_LIST


 if is_vision_available():
@@ -86,9 +86,7 @@ class YolosModelTester:
        self.num_detection_tokens = num_detection_tokens
        # we set the expected sequence length (which is used in several tests)
        # expected sequence length = num_patches + 1 (we add 1 for the [CLS] token) + num_detection_tokens
-        image_size = to_2tuple(self.image_size)
-        patch_size = to_2tuple(self.patch_size)
-        num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0])
+        num_patches = (image_size[1] // patch_size) * (image_size[0] // patch_size)
        self.expected_seq_len = num_patches + 1 + self.num_detection_tokens

    def prepare_config_and_inputs(self):