🚨 Don't use cache in non-generative models (#38751)

* deprecate for 1 version * style * fix some tests * fix esm * skip for now, GC requires positional args but we have keyword args * remove transpose for scores in modified models only * skip fx trace tests
2025-07-01 11:08:21 +02:00
parent dbc98328da
commit e435574721
37 changed files with 969 additions and 2328 deletions
--- a/tests/models/altclip/test_modeling_altclip.py
+++ b/tests/models/altclip/test_modeling_altclip.py
@@ -297,7 +297,7 @@ class AltCLIPTextModelTester:
@require_torch
 class AltCLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (AltCLIPTextModel,) if is_torch_available() else ()
-    fx_compatible = True
+    fx_compatible = False  # Cannot support if `can_return_tuple`
    test_pruning = False
    test_head_masking = False

@@ -411,7 +411,7 @@ def prepare_img():
 class AltCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    all_model_classes = (AltCLIPModel,) if is_torch_available() else ()
    pipeline_model_mapping = {"feature-extraction": AltCLIPModel} if is_torch_available() else {}
-    fx_compatible = True
+    fx_compatible = False  # Cannot support if `can_return_tuple`
    test_head_masking = False
    test_pruning = False
    test_resize_embeddings = False
--- a/tests/models/layoutlm/test_modeling_layoutlm.py
+++ b/tests/models/layoutlm/test_modeling_layoutlm.py
@@ -243,7 +243,7 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
        if is_torch_available()
        else {}
    )
-    fx_compatible = True
+    fx_compatible = False  # Cannot support if `can_return_tuple`

    def setUp(self):
        self.model_tester = LayoutLMModelTester(self)
--- a/tests/models/splinter/test_modeling_splinter.py
+++ b/tests/models/splinter/test_modeling_splinter.py
@@ -372,6 +372,18 @@ class SplinterModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
            with torch.no_grad():
                _ = model(**self._prepare_for_class(inputs_dict, model_class))

+    @unittest.skip(
+        "Splinter GC with `use_reentrant` fails after #38751, FIXME raushan after deprecated args are removed"
+    )
+    def test_training_gradient_checkpointing(self):
+        pass
+
+    @unittest.skip(
+        "Splinter GC with `use_reentrant` fails after #38751, FIXME raushan after deprecated args are removed"
+    )
+    def test_training_gradient_checkpointing_use_reentrant(self):
+        pass
+

@require_torch
 class SplinterModelIntegrationTest(unittest.TestCase):