Add common test for torch.export and fix some vision models (#35124)

* Add is_torch_greater_or_equal test decorator * Add common test for torch.export * Fix bit * Fix focalnet * Fix imagegpt * Fix seggpt * Fix swin2sr * Enable torch.export test for vision models * Enable test for video models * Remove json * Enable for hiera * Enable for ijepa * Fix detr * Fic conditional_detr * Fix maskformer * Enable test maskformer * Fix test for deformable detr * Fix custom kernels for export in rt-detr and deformable-detr * Enable test for all DPT * Remove custom test for deformable detr * Simplify test to use only kwargs for export * Add comment * Move compile_compatible_method_lru_cache to utils * Fix beit export * Fix deformable detr * Fix copies data2vec<->beit * Fix typos, update test to work with dict * Add seed to the test * Enable test for vit_mae * Fix beit tests * [run-slow] beit, bit, conditional_detr, data2vec, deformable_detr, detr, focalnet, imagegpt, maskformer, rt_detr, seggpt, swin2sr * Add vitpose test * Add textnet test * Add dinov2 with registers * Update tests/test_modeling_common.py * Switch to torch.testing.assert_close * Fix masformer * Remove save-load from test * Add dab_detr * Add depth_pro * Fix and test RT-DETRv2 * Fix dab_detr
2025-02-11 11:37:31 +00:00
parent 1779f5180e
commit f42d46ccb4
77 changed files with 305 additions and 151 deletions
--- a/tests/models/beit/test_modeling_beit.py
+++ b/tests/models/beit/test_modeling_beit.py
@@ -271,6 +271,7 @@ class BeitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = BeitModelTester(self)
@@ -292,6 +293,10 @@ class BeitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_feed_forward_chunking(self):
        pass

+    @unittest.skip(reason="BEiT can't compile dynamic")
+    def test_sdpa_can_compile_dynamic(self):
+        pass
+
    def test_model_get_set_embeddings(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

@@ -764,13 +769,6 @@ class BeitModelIntegrationTest(unittest.TestCase):
        inputs = processor(images=image, return_tensors="pt", size={"height": 480, "width": 480})
        pixel_values = inputs.pixel_values.to(torch_device)

-        # with interpolate_pos_encoding being False an exception should be raised with higher resolution
-        # images than what the model supports.
-        self.assertFalse(processor.do_center_crop)
-        with torch.no_grad():
-            with self.assertRaises(ValueError, msg="doesn't match model"):
-                model(pixel_values, interpolate_pos_encoding=False)
-
        # with interpolate_pos_encoding being True the model should process the higher resolution image
        # successfully and produce the expected output.
        with torch.no_grad():
--- a/tests/models/bit/test_modeling_bit.py
+++ b/tests/models/bit/test_modeling_bit.py
@@ -170,6 +170,7 @@ class BitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = BitModelTester(self)
--- a/tests/models/conditional_detr/test_modeling_conditional_detr.py
+++ b/tests/models/conditional_detr/test_modeling_conditional_detr.py
@@ -194,6 +194,7 @@ class ConditionalDetrModelTest(ModelTesterMixin, GenerationTesterMixin, Pipeline
    test_head_masking = False
    test_missing_keys = False
    zero_init_hidden_state = True
+    test_torch_exportable = True

    # special case for head models
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/convnext/test_modeling_convnext.py
+++ b/tests/models/convnext/test_modeling_convnext.py
@@ -180,6 +180,7 @@ class ConvNextModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = ConvNextModelTester(self)
--- a/tests/models/convnextv2/test_modeling_convnextv2.py
+++ b/tests/models/convnextv2/test_modeling_convnextv2.py
@@ -188,6 +188,7 @@ class ConvNextV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = ConvNextV2ModelTester(self)
--- a/tests/models/cvt/test_modeling_cvt.py
+++ b/tests/models/cvt/test_modeling_cvt.py
@@ -159,6 +159,7 @@ class CvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = CvtModelTester(self)
--- a/tests/models/dab_detr/test_modeling_dab_detr.py
+++ b/tests/models/dab_detr/test_modeling_dab_detr.py
@@ -197,6 +197,7 @@ class DabDetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
    test_head_masking = False
    test_missing_keys = False
    zero_init_hidden_state = True
+    test_torch_exportable = True

    # special case for head models
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/deformable_detr/test_modeling_deformable_detr.py
+++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py
@@ -200,6 +200,7 @@ class DeformableDetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
    test_pruning = False
    test_head_masking = False
    test_missing_keys = False
+    test_torch_exportable = True

    # special case for head models
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/deit/test_modeling_deit.py
+++ b/tests/models/deit/test_modeling_deit.py
@@ -222,6 +222,7 @@ class DeiTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = DeiTModelTester(self)
--- a/tests/models/depth_anything/test_modeling_depth_anything.py
+++ b/tests/models/depth_anything/test_modeling_depth_anything.py
@@ -146,6 +146,7 @@ class DepthAnythingModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Tes
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = DepthAnythingModelTester(self)
--- a/tests/models/depth_pro/test_modeling_depth_pro.py
+++ b/tests/models/depth_pro/test_modeling_depth_pro.py
@@ -212,6 +212,7 @@ class DepthProModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = DepthProModelTester(self)
--- a/tests/models/detr/test_modeling_detr.py
+++ b/tests/models/detr/test_modeling_detr.py
@@ -194,6 +194,7 @@ class DetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
    test_head_masking = False
    test_missing_keys = False
    zero_init_hidden_state = True
+    test_torch_exportable = True

    # special case for head models
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/dinat/test_modeling_dinat.py
+++ b/tests/models/dinat/test_modeling_dinat.py
@@ -216,6 +216,7 @@ class DinatModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = DinatModelTester(self)
--- a/tests/models/dinov2/test_modeling_dinov2.py
+++ b/tests/models/dinov2/test_modeling_dinov2.py
@@ -212,6 +212,8 @@ class Dinov2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    attention_mask and seq_length.
    """

+    test_torch_exportable = True
+
    all_model_classes = (
        (
            Dinov2Model,
--- a/tests/models/dinov2_with_registers/test_modeling_dinov2_with_registers.py
+++ b/tests/models/dinov2_with_registers/test_modeling_dinov2_with_registers.py
@@ -237,6 +237,7 @@ class Dinov2WithRegistersModelTest(ModelTesterMixin, PipelineTesterMixin, unitte
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = Dinov2WithRegistersModelTester(self)
--- a/tests/models/dpt/test_modeling_dpt.py
+++ b/tests/models/dpt/test_modeling_dpt.py
@@ -172,6 +172,7 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = DPTModelTester(self)
--- a/tests/models/dpt/test_modeling_dpt_auto_backbone.py
+++ b/tests/models/dpt/test_modeling_dpt_auto_backbone.py
@@ -140,6 +140,7 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = DPTModelTester(self)
--- a/tests/models/dpt/test_modeling_dpt_hybrid.py
+++ b/tests/models/dpt/test_modeling_dpt_hybrid.py
@@ -186,6 +186,7 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = DPTModelTester(self)
--- a/tests/models/efficientnet/test_modeling_efficientnet.py
+++ b/tests/models/efficientnet/test_modeling_efficientnet.py
@@ -139,6 +139,7 @@ class EfficientNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = EfficientNetModelTester(self)
--- a/tests/models/focalnet/test_modeling_focalnet.py
+++ b/tests/models/focalnet/test_modeling_focalnet.py
@@ -247,6 +247,7 @@ class FocalNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = FocalNetModelTester(self)
--- a/tests/models/glpn/test_modeling_glpn.py
+++ b/tests/models/glpn/test_modeling_glpn.py
@@ -152,6 +152,7 @@ class GLPNModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_head_masking = False
    test_pruning = False
    test_resize_embeddings = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = GLPNModelTester(self)
--- a/tests/models/hiera/test_modeling_hiera.py
+++ b/tests/models/hiera/test_modeling_hiera.py
@@ -250,6 +250,7 @@ class HieraModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = HieraModelTester(self)
--- a/tests/models/ijepa/test_modeling_ijepa.py
+++ b/tests/models/ijepa/test_modeling_ijepa.py
@@ -207,6 +207,7 @@ class IJepaModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = IJepaModelTester(self)
--- a/tests/models/imagegpt/test_modeling_imagegpt.py
+++ b/tests/models/imagegpt/test_modeling_imagegpt.py
@@ -237,6 +237,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
        else {}
    )
    test_missing_keys = False
+    test_torch_exportable = True

    # as ImageGPTForImageClassification isn't included in any auto mapping, we add labels here
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/mask2former/test_modeling_mask2former.py
+++ b/tests/models/mask2former/test_modeling_mask2former.py
@@ -205,6 +205,7 @@ class Mask2FormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    test_pruning = False
    test_head_masking = False
    test_missing_keys = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = Mask2FormerModelTester(self)
--- a/tests/models/maskformer/test_modeling_maskformer.py
+++ b/tests/models/maskformer/test_modeling_maskformer.py
@@ -209,6 +209,7 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
    test_head_masking = False
    test_missing_keys = False
    zero_init_hidden_state = True
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = MaskFormerModelTester(self)
--- a/tests/models/maskformer/test_modeling_maskformer_swin.py
+++ b/tests/models/maskformer/test_modeling_maskformer_swin.py
@@ -181,6 +181,7 @@ class MaskFormerSwinModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = MaskFormerSwinModelTester(self)
--- a/tests/models/mobilenet_v1/test_modeling_mobilenet_v1.py
+++ b/tests/models/mobilenet_v1/test_modeling_mobilenet_v1.py
@@ -154,6 +154,7 @@ class MobileNetV1ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = MobileNetV1ModelTester(self)
--- a/tests/models/mobilenet_v2/test_modeling_mobilenet_v2.py
+++ b/tests/models/mobilenet_v2/test_modeling_mobilenet_v2.py
@@ -205,6 +205,7 @@ class MobileNetV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = MobileNetV2ModelTester(self)
--- a/tests/models/mobilevit/test_modeling_mobilevit.py
+++ b/tests/models/mobilevit/test_modeling_mobilevit.py
@@ -198,6 +198,7 @@ class MobileViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = MobileViTModelTester(self)
--- a/tests/models/mobilevitv2/test_modeling_mobilevitv2.py
+++ b/tests/models/mobilevitv2/test_modeling_mobilevitv2.py
@@ -200,6 +200,7 @@ class MobileViTV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = MobileViTV2ModelTester(self)
--- a/tests/models/poolformer/test_modeling_poolformer.py
+++ b/tests/models/poolformer/test_modeling_poolformer.py
@@ -132,6 +132,7 @@ class PoolFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
    test_resize_embeddings = False
    test_torchscript = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = PoolFormerModelTester(self)
--- a/tests/models/pvt/test_modeling_pvt.py
+++ b/tests/models/pvt/test_modeling_pvt.py
@@ -166,6 +166,7 @@ class PvtModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_resize_embeddings = False
    test_torchscript = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = PvtModelTester(self)
--- a/tests/models/pvt_v2/test_modeling_pvt_v2.py
+++ b/tests/models/pvt_v2/test_modeling_pvt_v2.py
@@ -202,6 +202,7 @@ class PvtV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_resize_embeddings = False
    test_torchscript = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = PvtV2ModelTester(self)
--- a/tests/models/regnet/test_modeling_regnet.py
+++ b/tests/models/regnet/test_modeling_regnet.py
@@ -133,6 +133,7 @@ class RegNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = RegNetModelTester(self)
--- a/tests/models/resnet/test_modeling_resnet.py
+++ b/tests/models/resnet/test_modeling_resnet.py
@@ -178,6 +178,7 @@ class ResNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = ResNetModelTester(self)
--- a/tests/models/rt_detr/test_modeling_rt_detr.py
+++ b/tests/models/rt_detr/test_modeling_rt_detr.py
@@ -261,6 +261,7 @@ class RTDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_head_masking = False
    test_missing_keys = False
+    test_torch_exportable = True

    # special case for head models
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py
+++ b/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py
@@ -259,6 +259,7 @@ class RTDetrV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    test_pruning = False
    test_head_masking = False
    test_missing_keys = False
+    test_torch_exportable = True

    # special case for head models
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/segformer/test_modeling_segformer.py
+++ b/tests/models/segformer/test_modeling_segformer.py
@@ -180,6 +180,7 @@ class SegformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
    test_head_masking = False
    test_pruning = False
    test_resize_embeddings = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = SegformerModelTester(self)
--- a/tests/models/seggpt/test_modeling_seggpt.py
+++ b/tests/models/seggpt/test_modeling_seggpt.py
@@ -172,6 +172,8 @@ class SegGptModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_resize_embeddings = False
    test_head_masking = False
    test_torchscript = False
+    test_torch_exportable = True
+
    pipeline_model_mapping = (
        {"feature-extraction": SegGptModel, "mask-generation": SegGptModel} if is_torch_available() else {}
    )
--- a/tests/models/swiftformer/test_modeling_swiftformer.py
+++ b/tests/models/swiftformer/test_modeling_swiftformer.py
@@ -147,6 +147,7 @@ class SwiftFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = SwiftFormerModelTester(self)
--- a/tests/models/swin/test_modeling_swin.py
+++ b/tests/models/swin/test_modeling_swin.py
@@ -240,6 +240,7 @@ class SwinModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = SwinModelTester(self)
--- a/tests/models/swin2sr/test_modeling_swin2sr.py
+++ b/tests/models/swin2sr/test_modeling_swin2sr.py
@@ -172,6 +172,7 @@ class Swin2SRModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    test_resize_embeddings = False
    test_head_masking = False
    test_torchscript = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = Swin2SRModelTester(self)
--- a/tests/models/swinv2/test_modeling_swinv2.py
+++ b/tests/models/swinv2/test_modeling_swinv2.py
@@ -226,6 +226,7 @@ class Swinv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = Swinv2ModelTester(self)
--- a/tests/models/table_transformer/test_modeling_table_transformer.py
+++ b/tests/models/table_transformer/test_modeling_table_transformer.py
@@ -209,6 +209,7 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
    test_head_masking = False
    test_missing_keys = False
    zero_init_hidden_state = True
+    test_torch_exportable = True

    # special case for head models
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/textnet/test_modeling_textnet.py
+++ b/tests/models/textnet/test_modeling_textnet.py
@@ -217,6 +217,7 @@ class TextNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True
    has_attentions = False

    def setUp(self):
--- a/tests/models/timesformer/test_modeling_timesformer.py
+++ b/tests/models/timesformer/test_modeling_timesformer.py
@@ -167,6 +167,7 @@ class TimesformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = TimesformerModelTester(self)
--- a/tests/models/upernet/test_modeling_upernet.py
+++ b/tests/models/upernet/test_modeling_upernet.py
@@ -157,6 +157,7 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    test_head_masking = False
    test_torchscript = False
    has_attentions = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = UperNetModelTester(self)
--- a/tests/models/videomae/test_modeling_videomae.py
+++ b/tests/models/videomae/test_modeling_videomae.py
@@ -186,6 +186,7 @@ class VideoMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = VideoMAEModelTester(self)
--- a/tests/models/vit/test_modeling_vit.py
+++ b/tests/models/vit/test_modeling_vit.py
@@ -207,6 +207,7 @@ class ViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = ViTModelTester(self)
--- a/tests/models/vit_mae/test_modeling_vit_mae.py
+++ b/tests/models/vit_mae/test_modeling_vit_mae.py
@@ -174,6 +174,7 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = ViTMAEModelTester(self)
--- a/tests/models/vit_msn/test_modeling_vit_msn.py
+++ b/tests/models/vit_msn/test_modeling_vit_msn.py
@@ -162,6 +162,7 @@ class ViTMSNModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = ViTMSNModelTester(self)
--- a/tests/models/vitdet/test_modeling_vitdet.py
+++ b/tests/models/vitdet/test_modeling_vitdet.py
@@ -169,6 +169,7 @@ class VitDetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = VitDetModelTester(self)
--- a/tests/models/vitmatte/test_modeling_vitmatte.py
+++ b/tests/models/vitmatte/test_modeling_vitmatte.py
@@ -143,6 +143,7 @@ class VitMatteModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = VitMatteModelTester(self)
--- a/tests/models/vitpose/test_modeling_vitpose.py
+++ b/tests/models/vitpose/test_modeling_vitpose.py
@@ -154,6 +154,7 @@ class VitPoseModelTest(ModelTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = VitPoseModelTester(self)
--- a/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py
+++ b/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py
@@ -18,7 +18,7 @@ import inspect
 import unittest

 from transformers import VitPoseBackboneConfig
-from transformers.testing_utils import require_torch
+from transformers.testing_utils import require_torch, torch_device
 from transformers.utils import is_torch_available, is_vision_available

 from ...test_backbone_common import BackboneTesterMixin
@@ -27,6 +27,8 @@ from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor


 if is_torch_available():
+    import torch
+
    from transformers import VitPoseBackbone


@@ -129,6 +131,7 @@ class VitPoseBackboneModelTest(ModelTesterMixin, unittest.TestCase):
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = VitPoseBackboneModelTester(self)
@@ -187,6 +190,17 @@ class VitPoseBackboneModelTest(ModelTesterMixin, unittest.TestCase):
            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

+    def test_torch_export(self):
+        # Dense architecture
+        super().test_torch_export()
+
+        # MOE architecture
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        config.num_experts = 2
+        config.part_features = config.hidden_size // config.num_experts
+        inputs_dict["dataset_index"] = torch.tensor([0] * self.model_tester.batch_size, device=torch_device)
+        super().test_torch_export(config=config, inputs_dict=inputs_dict)
+

@require_torch
 class VitPoseBackboneTest(unittest.TestCase, BackboneTesterMixin):
--- a/tests/models/vivit/test_modeling_vivit.py
+++ b/tests/models/vivit/test_modeling_vivit.py
@@ -175,6 +175,7 @@ class VivitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = VivitModelTester(self)
--- a/tests/models/yolos/test_modeling_yolos.py
+++ b/tests/models/yolos/test_modeling_yolos.py
@@ -178,6 +178,7 @@ class YolosModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    test_resize_embeddings = False
    test_head_masking = False
    test_torchscript = False
+    test_torch_exportable = True

    # special case for head model
    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
--- a/tests/models/zoedepth/test_modeling_zoedepth.py
+++ b/tests/models/zoedepth/test_modeling_zoedepth.py
@@ -147,6 +147,7 @@ class ZoeDepthModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
+    test_torch_exportable = True

    def setUp(self):
        self.model_tester = ZoeDepthModelTester(self)
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -86,6 +86,7 @@ from transformers.testing_utils import (
    require_torch,
    require_torch_accelerator,
    require_torch_gpu,
+    require_torch_greater_or_equal,
    require_torch_multi_accelerator,
    require_torch_multi_gpu,
    require_torch_sdpa,
@@ -221,6 +222,7 @@ class ModelTesterMixin:
    test_mismatched_shapes = True
    test_missing_keys = True
    test_model_parallel = False
+    test_torch_exportable = False
    # Used in `check_training_gradient_checkpointing` to NOT check all params having gradient (e.g. for some MOE models)
    test_all_params_have_gradient = True
    is_encoder_decoder = False
@@ -4865,6 +4867,72 @@ class ModelTesterMixin:
            # Assert the last tokens are actually the same (except for the natural fluctuation due to order of FP ops)
            torch.testing.assert_close(all_logits[:, -1:, :], last_token_logits, rtol=1e-5, atol=1e-5)

+    @slow
+    @require_torch_greater_or_equal("2.5")
+    def test_torch_export(self, config=None, inputs_dict=None, tolerance=1e-4):
+        """
+        Test if model can be exported with torch.export.export()
+
+        Args:
+            config (PretrainedConfig):
+                Config to use for the model, if None, use default config from model_tester
+            inputs_dict (dict):
+                Inputs to use for the model, if None, use default inputs from model_tester
+            tolerance (float):
+                `atol` for torch.allclose(), defined in signature for test overriding
+        """
+        if not self.test_torch_exportable:
+            self.skipTest(reason="test_torch_exportable=False for this model.")
+
+        def recursively_check(eager_outputs, exported_outputs):
+            is_tested = False
+            if isinstance(eager_outputs, torch.Tensor):
+                torch.testing.assert_close(eager_outputs, exported_outputs, atol=tolerance, rtol=tolerance)
+                return True
+            elif isinstance(eager_outputs, (tuple, list)):
+                for eager_output, exported_output in zip(eager_outputs, exported_outputs):
+                    is_tested = is_tested or recursively_check(eager_output, exported_output)
+                return is_tested
+            elif isinstance(eager_outputs, dict):
+                for key in eager_outputs:
+                    is_tested = is_tested or recursively_check(eager_outputs[key], exported_outputs[key])
+                return is_tested
+            return is_tested
+
+        default_config, default_inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        config = config or default_config
+        inputs_dict = inputs_dict or default_inputs_dict
+
+        for model_class in self.all_model_classes:
+            if model_class.__name__.endswith("ForPreTraining"):
+                continue
+
+            with self.subTest(model_class.__name__):
+                model = model_class(config).eval().to(torch_device)
+
+                # Export model
+                exported_model = torch.export.export(
+                    model,
+                    args=(),
+                    kwargs=inputs_dict,
+                    strict=True,
+                )
+
+                # Run exported model and eager model
+                with torch.no_grad():
+                    # set seed in case anything is not deterministic in model (e.g. vit_mae noise)
+                    torch.manual_seed(1234)
+                    eager_outputs = model(**inputs_dict)
+                    torch.manual_seed(1234)
+                    exported_outputs = exported_model.module().forward(**inputs_dict)
+
+                # Check if outputs are close:
+                # is_tested is a boolean flag idicating if we comapre any outputs,
+                # e.g. there might be a situation when outputs are empty list, then is_tested will be False.
+                # In case of outputs are different the error will be rasied in `recursively_check` function.
+                is_tested = recursively_check(eager_outputs, exported_outputs)
+                self.assertTrue(is_tested, msg=f"No outputs were compared for {model_class.__name__}")
+
    @require_torch_gpu
    def test_flex_attention_with_grads(self):
        for model_class in self.all_model_classes: