fix pegasus init weights and other copied models (#36844)
* fix pegasus init weights Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix the rest of models Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix test Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix informer init Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * init weight before checking Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix roformer tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix roformer tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> --------- Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
This commit is contained in:
@@ -171,6 +171,7 @@ class InformerModelTester:
|
||||
embed_positions = InformerSinusoidalPositionalEmbedding(
|
||||
config.context_length + config.prediction_length, config.d_model
|
||||
).to(torch_device)
|
||||
embed_positions.weight = embed_positions._init_weight(embed_positions.weight)
|
||||
self.parent.assertTrue(torch.equal(model.encoder.embed_positions.weight, embed_positions.weight))
|
||||
self.parent.assertTrue(torch.equal(model.decoder.embed_positions.weight, embed_positions.weight))
|
||||
|
||||
|
||||
@@ -348,6 +348,19 @@ class PegasusXSUMIntegrationTest(AbstractSeq2SeqIntegrationTest):
|
||||
def model(self):
|
||||
return AutoModelForSeq2SeqLM.from_pretrained(self.checkpoint_name).to(torch_device)
|
||||
|
||||
@slow
|
||||
def test_device_map(self):
|
||||
model_no_device_map = AutoModelForSeq2SeqLM.from_pretrained(self.checkpoint_name).to(torch_device)
|
||||
model_with_device_map = AutoModelForSeq2SeqLM.from_pretrained(self.checkpoint_name, device_map="auto")
|
||||
assert torch.equal(
|
||||
model_no_device_map.model.decoder.embed_positions.weight,
|
||||
model_with_device_map.model.decoder.embed_positions.weight,
|
||||
)
|
||||
assert torch.equal(
|
||||
model_no_device_map.model.encoder.embed_positions.weight,
|
||||
model_with_device_map.model.encoder.embed_positions.weight,
|
||||
)
|
||||
|
||||
@slow
|
||||
@require_torch_fp16
|
||||
def test_pegasus_xsum_summary(self):
|
||||
|
||||
@@ -534,6 +534,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
|
||||
def test_basic(self):
|
||||
input_ids = torch.tensor([[4, 10]], dtype=torch.long, device=torch_device)
|
||||
emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=6, embedding_dim=6).to(torch_device)
|
||||
emb1.weight = emb1._init_weight(emb1.weight)
|
||||
emb = emb1(input_ids.shape)
|
||||
desired_weights = torch.tensor(
|
||||
[[0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 1.0000], [0.8415, 0.0464, 0.0022, 0.5403, 0.9989, 1.0000]]
|
||||
@@ -552,6 +553,7 @@ class RoFormerSinusoidalPositionalEmbeddingTest(unittest.TestCase):
|
||||
]
|
||||
).to(torch_device)
|
||||
emb1 = RoFormerSinusoidalPositionalEmbedding(num_positions=512, embedding_dim=512).to(torch_device)
|
||||
emb1.weight = emb1._init_weight(emb1.weight)
|
||||
weights = emb1.weight.data[:3, :5].to(torch_device)
|
||||
|
||||
self.assertTrue(
|
||||
@@ -573,6 +575,7 @@ class RoFormerSelfAttentionRotaryPositionEmbeddingTest(unittest.TestCase):
|
||||
-torch.arange(2 * 12 * 16 * 64, dtype=torch.float, device=torch_device).reshape(2, 12, 16, 64) / 100
|
||||
).to(torch_device)
|
||||
embed_positions = RoFormerSinusoidalPositionalEmbedding(num_positions=32, embedding_dim=64).to(torch_device)
|
||||
embed_positions.weight = embed_positions._init_weight(embed_positions.weight)
|
||||
sinusoidal_pos = embed_positions([2, 16, 768])[None, None, :, :]
|
||||
|
||||
query_layer, key_layer = RoFormerSelfAttention.apply_rotary_position_embeddings(
|
||||
|
||||
Reference in New Issue
Block a user