Remove low_cpu_mem_usage and _fast_init (#36963)

* Remove low_cpu_mem_usage and _fast_init * Update deepspeed.py * Update modeling_utils.py * remove the first 2 tests everywhere * Update test_modeling_common.py * remove what was remaining about fast_init * fix logic and simplify * mismatched keys logic update * Update modeling_utils.py * Update modeling_utils.py * Update modeling_utils.py * Update modeling_utils.py * fix 2 models init_weights * extend to others * remove grad * Update modeling_fsmt.py * init weights in tests * style * Update test_modeling_fsmt.py * more old models * fix more init_weights * copies * fix * style * Update modeling_lxmert.py * fix inits * more and more * more * should finalize * style * Update modeling_dinov2_with_registers.py * fix * Update modeling_encoder_decoder.py * fix * style * Update modeling_lxmert.py * post rebase cleanup * Update modeling_informer.py * back to start for device * fix * add test to detect all failing cases correctly * Update test_modeling_common.py * fix * fix * sam * style * Update modeling_maskformer_swin.py * CIs * CIs * remove test - will add it on separate PR * fix * fix * Update modeling_sam.py * CIs * CIs * CIs * convnext * suggestions * CIs * fix copies after merge --------- Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
2025-03-31 17:18:43 +02:00
parent 8805600406
commit f304318f5f
128 changed files with 464 additions and 1165 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -505,60 +505,6 @@ class ModelTesterMixin:
                        m.gradient_checkpointing, f"Module {n} does not have gradient_checkpointing set to False"
                    )

-    @is_flaky(description="low likelihood of failure, reason not yet discovered")
-    def test_save_load_fast_init_from_base(self):
-        for model_class in self.all_model_classes:
-            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-            if config.__class__ not in MODEL_MAPPING:
-                self.skipTest(reason=f"{config.__class__.__name__} not in MODEL_MAPPING")
-
-            base_class = MODEL_MAPPING[config.__class__]
-
-            if isinstance(base_class, tuple):
-                base_class = base_class[0]
-
-            if model_class == base_class:
-                continue
-
-            # make a copy of model class to not break future tests
-            # from https://stackoverflow.com/questions/9541025/how-to-copy-a-python-class
-            class CopyClass(model_class):
-                pass
-
-            model_class_copy = CopyClass
-
-            # make sure that all keys are expected for test
-            model_class_copy._keys_to_ignore_on_load_missing = []
-
-            # make init deterministic, but make sure that
-            # non-initialized weights throw errors nevertheless
-            model_class_copy._init_weights = _mock_init_weights
-            model_class_copy.init_weights = _mock_all_init_weights
-
-            model = base_class(config)
-            state_dict = model.state_dict()
-
-            # this will often delete a single weight of a multi-weight module
-            # to test an edge case
-            random_key_to_del = random.choice(list(state_dict.keys()))
-            del state_dict[random_key_to_del]
-
-            # check that certain keys didn't get saved with the model
-            with tempfile.TemporaryDirectory() as tmpdirname:
-                model.save_pretrained(tmpdirname)
-                torch.save(state_dict, os.path.join(tmpdirname, "pytorch_model.bin"))
-
-                model_fast_init = model_class_copy.from_pretrained(tmpdirname)
-                model_slow_init = model_class_copy.from_pretrained(tmpdirname, _fast_init=False)
-                # Before we test anything
-
-                for key in model_fast_init.state_dict().keys():
-                    if isinstance(model_slow_init.state_dict()[key], torch.BoolTensor):
-                        max_diff = (model_slow_init.state_dict()[key] ^ model_fast_init.state_dict()[key]).sum().item()
-                    else:
-                        max_diff = (model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key]).sum().item()
-                    self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
-
    @slow
    @require_accelerate
    @mark.accelerate_tests
@@ -640,62 +586,6 @@ class ModelTesterMixin:

        self.assertEqual(tied_params1, tied_params2)

-    def test_save_load_fast_init_to_base(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        if config.__class__ not in MODEL_MAPPING:
-            self.skipTest(reason=f"{config.__class__.__name__} not in MODEL_MAPPING")
-
-        base_class = MODEL_MAPPING[config.__class__]
-
-        if isinstance(base_class, tuple):
-            base_class = base_class[0]
-
-        for model_class in self.all_model_classes:
-            if model_class == base_class:
-                continue
-
-            # make a copy of model class to not break future tests
-            # from https://stackoverflow.com/questions/9541025/how-to-copy-a-python-class
-            class CopyClass(base_class):
-                pass
-
-            base_class_copy = CopyClass
-
-            # make sure that all keys are expected for test
-            base_class_copy._keys_to_ignore_on_load_missing = []
-
-            # make init deterministic, but make sure that
-            # non-initialized weights throw errors nevertheless
-            base_class_copy._init_weights = _mock_init_weights
-            base_class_copy.init_weights = _mock_all_init_weights
-
-            model = model_class(config)
-            state_dict = model.state_dict()
-
-            # this will often delete a single weight of a multi-weight module
-            # to test an edge case
-            random_key_to_del = random.choice(list(state_dict.keys()))
-            del state_dict[random_key_to_del]
-
-            # check that certain keys didn't get saved with the model
-            with tempfile.TemporaryDirectory() as tmpdirname:
-                model.config.save_pretrained(tmpdirname)
-                torch.save(state_dict, os.path.join(tmpdirname, "pytorch_model.bin"))
-
-                model_fast_init = base_class_copy.from_pretrained(tmpdirname)
-                model_slow_init = base_class_copy.from_pretrained(tmpdirname, _fast_init=False)
-
-                for key in model_fast_init.state_dict().keys():
-                    if isinstance(model_slow_init.state_dict()[key], torch.BoolTensor):
-                        max_diff = torch.max(
-                            model_slow_init.state_dict()[key] ^ model_fast_init.state_dict()[key]
-                        ).item()
-                    else:
-                        max_diff = torch.max(
-                            torch.abs(model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key])
-                        ).item()
-                    self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
-
    def test_torch_save_load(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if config.__class__ not in MODEL_MAPPING:
@@ -3189,7 +3079,7 @@ class ModelTesterMixin:
        # not to init. the weights during the creation: to match the logic in `from_pretrained`, so we can keep the
        # same sequence of random ops in the execution path to allow us to compare `target_model` and `new_model` below
        # for `linear` part.
-        with ContextManagers([no_init_weights(True)]):
+        with ContextManagers([no_init_weights()]):
            target_model = MyClass(config=config)
        target_model.apply(target_model._initialize_weights)