fix pegasus init weights and other copied models (#36844)

* fix pegasus init weights

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix the rest of models

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix test

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix informer init

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* init weight before checking

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix roformer tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix roformer tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

---------

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
This commit is contained in:
jiqing-feng
2025-03-27 21:14:30 +08:00
committed by GitHub
parent 7e813f9cf0
commit 0e56fb69a2
9 changed files with 27 additions and 12 deletions

View File

@@ -534,6 +534,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
def test_basic(self):
input_ids = torch.tensor([[4, 10]], dtype=torch.long, device=torch_device)
emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=6, embedding_dim=6).to(torch_device)
emb1.weight = emb1._init_weight(emb1.weight)
emb = emb1(input_ids.shape)
desired_weights = torch.tensor(
[[0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 1.0000], [0.8415, 0.0464, 0.0022, 0.5403, 0.9989, 1.0000]]
@@ -552,6 +553,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
]
).to(torch_device)
emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=512, embedding_dim=512).to(torch_device)
emb1.weight = emb1._init_weight(emb1.weight)
weights = emb1.weight.data[:3, :5].to(torch_device)
self.assertTrue(
@@ -573,6 +575,7 @@ class RoFormerSelfAttentionRotaryPositionEmbeddingTest(unittest.TestCase):
-torch.arange(2 * 12 * 16 * 64, dtype=torch.float, device=torch_device).reshape(2, 12, 16, 64) / 100
).to(torch_device)
embed_positions = RoFormerSinusoidalPositionalEmbedding(num_positions=32, embedding_dim=64).to(torch_device)
embed_positions.weight = embed_positions._init_weight(embed_positions.weight)
sinusoidal_pos = embed_positions([2, 16, 768])[None, None, :, :]
query_layer, key_layer = RoFormerSelfAttention.apply_rotary_position_embeddings(