Remove low_cpu_mem_usage and _fast_init (#36963)

* Remove low_cpu_mem_usage and _fast_init * Update deepspeed.py * Update modeling_utils.py * remove the first 2 tests everywhere * Update test_modeling_common.py * remove what was remaining about fast_init * fix logic and simplify * mismatched keys logic update * Update modeling_utils.py * Update modeling_utils.py * Update modeling_utils.py * Update modeling_utils.py * fix 2 models init_weights * extend to others * remove grad * Update modeling_fsmt.py * init weights in tests * style * Update test_modeling_fsmt.py * more old models * fix more init_weights * copies * fix * style * Update modeling_lxmert.py * fix inits * more and more * more * should finalize * style * Update modeling_dinov2_with_registers.py * fix * Update modeling_encoder_decoder.py * fix * style * Update modeling_lxmert.py * post rebase cleanup * Update modeling_informer.py * back to start for device * fix * add test to detect all failing cases correctly * Update test_modeling_common.py * fix * fix * sam * style * Update modeling_maskformer_swin.py * CIs * CIs * remove test - will add it on separate PR * fix * fix * Update modeling_sam.py * CIs * CIs * CIs * convnext * suggestions * CIs * fix copies after merge --------- Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
2025-03-31 17:18:43 +02:00
parent 8805600406
commit f304318f5f
128 changed files with 464 additions and 1165 deletions
--- a/tests/models/roformer/test_modeling_roformer.py
+++ b/tests/models/roformer/test_modeling_roformer.py
@@ -534,7 +534,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
    def test_basic(self):
        input_ids = torch.tensor([[4, 10]], dtype=torch.long, device=torch_device)
        emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=6, embedding_dim=6).to(torch_device)
-        emb1.weight = emb1._init_weight(emb1.weight)
+        emb1._init_weight()
        emb = emb1(input_ids.shape)
        desired_weights = torch.tensor(
            [[0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 1.0000], [0.8415, 0.0464, 0.0022, 0.5403, 0.9989, 1.0000]]
@@ -553,7 +553,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
            ]
        ).to(torch_device)
        emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=512, embedding_dim=512).to(torch_device)
-        emb1.weight = emb1._init_weight(emb1.weight)
+        emb1._init_weight()
        weights = emb1.weight.data[:3, :5].to(torch_device)

        self.assertTrue(
@@ -575,7 +575,7 @@ class RoFormerSelfAttentionRotaryPositionEmbeddingTest(unittest.TestCase):
            -torch.arange(2 * 12 * 16 * 64, dtype=torch.float, device=torch_device).reshape(2, 12, 16, 64) / 100
        ).to(torch_device)
        embed_positions = RoFormerSinusoidalPositionalEmbedding(num_positions=32, embedding_dim=64).to(torch_device)
-        embed_positions.weight = embed_positions._init_weight(embed_positions.weight)
+        embed_positions._init_weight()
        sinusoidal_pos = embed_positions([2, 16, 768])[None, None, :, :]

        query_layer, key_layer = RoFormerSelfAttention.apply_rotary_position_embeddings(