Remove low_cpu_mem_usage and _fast_init (#36963)

* Remove low_cpu_mem_usage and _fast_init

* Update deepspeed.py

* Update modeling_utils.py

* remove the first 2 tests everywhere

* Update test_modeling_common.py

* remove what was remaining about fast_init

* fix logic and simplify

* mismatched keys logic update

* Update modeling_utils.py

* Update modeling_utils.py

* Update modeling_utils.py

* Update modeling_utils.py

* fix 2 models init_weights

* extend to others

* remove grad

* Update modeling_fsmt.py

* init weights in tests

* style

* Update test_modeling_fsmt.py

* more old models

* fix more init_weights

* copies

* fix

* style

* Update modeling_lxmert.py

* fix inits

* more and more

* more

* should finalize

* style

* Update modeling_dinov2_with_registers.py

* fix

* Update modeling_encoder_decoder.py

* fix

* style

* Update modeling_lxmert.py

* post rebase cleanup

* Update modeling_informer.py

* back to start for device

* fix

* add test to detect all failing cases correctly

* Update test_modeling_common.py

* fix

* fix

* sam

* style

* Update modeling_maskformer_swin.py

* CIs

* CIs

* remove test - will add it on separate PR

* fix

* fix

* Update modeling_sam.py

* CIs

* CIs

* CIs

* convnext

* suggestions

* CIs

* fix copies after merge

---------

Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
Cyril Vallez
2025-03-31 17:18:43 +02:00
committed by GitHub
parent 8805600406
commit f304318f5f
128 changed files with 464 additions and 1165 deletions

View File

@@ -534,7 +534,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
def test_basic(self):
input_ids = torch.tensor([[4, 10]], dtype=torch.long, device=torch_device)
emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=6, embedding_dim=6).to(torch_device)
emb1.weight = emb1._init_weight(emb1.weight)
emb1._init_weight()
emb = emb1(input_ids.shape)
desired_weights = torch.tensor(
[[0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 1.0000], [0.8415, 0.0464, 0.0022, 0.5403, 0.9989, 1.0000]]
@@ -553,7 +553,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
]
).to(torch_device)
emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=512, embedding_dim=512).to(torch_device)
emb1.weight = emb1._init_weight(emb1.weight)
emb1._init_weight()
weights = emb1.weight.data[:3, :5].to(torch_device)
self.assertTrue(
@@ -575,7 +575,7 @@ class RoFormerSelfAttentionRotaryPositionEmbeddingTest(unittest.TestCase):
-torch.arange(2 * 12 * 16 * 64, dtype=torch.float, device=torch_device).reshape(2, 12, 16, 64) / 100
).to(torch_device)
embed_positions = RoFormerSinusoidalPositionalEmbedding(num_positions=32, embedding_dim=64).to(torch_device)
embed_positions.weight = embed_positions._init_weight(embed_positions.weight)
embed_positions._init_weight()
sinusoidal_pos = embed_positions([2, 16, 768])[None, None, :, :]
query_layer, key_layer = RoFormerSelfAttention.apply_rotary_position_embeddings(