Make gradient_checkpointing a training argument (#13657)

* Make gradient_checkpointing a training argument * Update src/transformers/modeling_utils.py Co-authored-by: Stas Bekman <stas00@users.noreply.github.com> * Update src/transformers/configuration_utils.py Co-authored-by: Stas Bekman <stas00@users.noreply.github.com> * Fix tests * Style * document Gradient Checkpointing as a performance feature * Small rename * PoC for not using the config * Adapt BC to new PoC * Forgot to save * Rollout changes to all other models * Fix typo Co-authored-by: Stas Bekman <stas00@users.noreply.github.com> Co-authored-by: Stas Bekman <stas@stason.org>
2021-09-22 07:51:38 -04:00
parent 75f6641eaf
commit 27d4639779
96 changed files with 531 additions and 309 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -370,15 +370,14 @@ class ModelTesterMixin:

    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        if not self.model_tester.is_training or not hasattr(config, "gradient_checkpointing"):
+        if not self.model_tester.is_training:
            return

-        config.gradient_checkpointing = True
        config.use_cache = False
        config.return_dict = True

        for model_class in self.all_model_classes:
-            if model_class in get_values(MODEL_MAPPING):
+            if model_class in get_values(MODEL_MAPPING) or not model_class.supports_gradient_checkpointing:
                continue
            model = model_class(config)
            model.to(torch_device)