Remove low_cpu_mem_usage and _fast_init (#36963)
* Remove low_cpu_mem_usage and _fast_init * Update deepspeed.py * Update modeling_utils.py * remove the first 2 tests everywhere * Update test_modeling_common.py * remove what was remaining about fast_init * fix logic and simplify * mismatched keys logic update * Update modeling_utils.py * Update modeling_utils.py * Update modeling_utils.py * Update modeling_utils.py * fix 2 models init_weights * extend to others * remove grad * Update modeling_fsmt.py * init weights in tests * style * Update test_modeling_fsmt.py * more old models * fix more init_weights * copies * fix * style * Update modeling_lxmert.py * fix inits * more and more * more * should finalize * style * Update modeling_dinov2_with_registers.py * fix * Update modeling_encoder_decoder.py * fix * style * Update modeling_lxmert.py * post rebase cleanup * Update modeling_informer.py * back to start for device * fix * add test to detect all failing cases correctly * Update test_modeling_common.py * fix * fix * sam * style * Update modeling_maskformer_swin.py * CIs * CIs * remove test - will add it on separate PR * fix * fix * Update modeling_sam.py * CIs * CIs * CIs * convnext * suggestions * CIs * fix copies after merge --------- Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -534,7 +534,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
|
||||
def test_basic(self):
|
||||
input_ids = torch.tensor([[4, 10]], dtype=torch.long, device=torch_device)
|
||||
emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=6, embedding_dim=6).to(torch_device)
|
||||
emb1.weight = emb1._init_weight(emb1.weight)
|
||||
emb1._init_weight()
|
||||
emb = emb1(input_ids.shape)
|
||||
desired_weights = torch.tensor(
|
||||
[[0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 1.0000], [0.8415, 0.0464, 0.0022, 0.5403, 0.9989, 1.0000]]
|
||||
@@ -553,7 +553,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
|
||||
]
|
||||
).to(torch_device)
|
||||
emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=512, embedding_dim=512).to(torch_device)
|
||||
emb1.weight = emb1._init_weight(emb1.weight)
|
||||
emb1._init_weight()
|
||||
weights = emb1.weight.data[:3, :5].to(torch_device)
|
||||
|
||||
self.assertTrue(
|
||||
@@ -575,7 +575,7 @@ class RoFormerSelfAttentionRotaryPositionEmbeddingTest(unittest.TestCase):
|
||||
-torch.arange(2 * 12 * 16 * 64, dtype=torch.float, device=torch_device).reshape(2, 12, 16, 64) / 100
|
||||
).to(torch_device)
|
||||
embed_positions = RoFormerSinusoidalPositionalEmbedding(num_positions=32, embedding_dim=64).to(torch_device)
|
||||
embed_positions.weight = embed_positions._init_weight(embed_positions.weight)
|
||||
embed_positions._init_weight()
|
||||
sinusoidal_pos = embed_positions([2, 16, 768])[None, None, :, :]
|
||||
|
||||
query_layer, key_layer = RoFormerSelfAttention.apply_rotary_position_embeddings(
|
||||
|
||||
Reference in New Issue
Block a user