🚨🚨🚨 Enforce single model initialization (#21431)
* Enforce single model initialization * Add OneFormer example for problem 3 * Do it the Stas way * Actually rename the uses... * Rewrite test * Try to change the test this way * Fix all init slow/fast tests * Break connection * Fix more tests * Fix test for initialization * Remove custom test * Quality * Fix last failing tests * The end?
This commit is contained in:
@@ -1145,10 +1145,11 @@ class ReformerIntegrationTests(unittest.TestCase):
|
||||
hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
|
||||
output_slice = hidden_states[1, -1, :5]
|
||||
expected_output_slice = torch.tensor(
|
||||
[0.0256, -0.0121, 0.0636, 0.0024, -0.0393],
|
||||
[0.1018, -0.2026, 0.2116, 0.0270, -0.1233],
|
||||
dtype=torch.float,
|
||||
device=torch_device,
|
||||
)
|
||||
|
||||
self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
|
||||
|
||||
def test_local_lm_model_grad(self):
|
||||
@@ -1163,25 +1164,25 @@ class ReformerIntegrationTests(unittest.TestCase):
|
||||
input_ids, _ = self._get_input_ids_and_mask()
|
||||
loss = model(input_ids=input_ids, labels=input_ids)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(loss, torch.tensor(5.7786, dtype=torch.float, device=torch_device), atol=1e-3))
|
||||
self.assertTrue(torch.allclose(loss, torch.tensor(5.8019, dtype=torch.float, device=torch_device), atol=1e-3))
|
||||
loss.backward()
|
||||
|
||||
# check last grads to cover all proable errors
|
||||
grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
|
||||
expected_grad_slice_word = torch.tensor(
|
||||
[-0.0005, 0.0001, 0.0002, 0.0003, 0.0006],
|
||||
[-0.0005, -0.0001, -0.0002, -0.0006, -0.0006],
|
||||
dtype=torch.float,
|
||||
device=torch_device,
|
||||
)
|
||||
grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
|
||||
expected_grad_slice_pos_fac_1 = torch.tensor(
|
||||
[0.0037, -1.3793, -1.0231, -1.5230, -2.5306],
|
||||
[-0.5235, 0.5704, 0.0922, -0.3140, 0.9928],
|
||||
dtype=torch.float,
|
||||
device=torch_device,
|
||||
)
|
||||
grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
|
||||
expected_grad_slice_pos_fac_2 = torch.tensor(
|
||||
[-1.3165, 0.5168, 0.7785, 1.0811, -0.9830],
|
||||
[1.7960, 1.7668, 0.5593, 0.0907, 1.8342],
|
||||
dtype=torch.float,
|
||||
device=torch_device,
|
||||
)
|
||||
@@ -1203,24 +1204,24 @@ class ReformerIntegrationTests(unittest.TestCase):
|
||||
input_ids, _ = self._get_input_ids_and_mask()
|
||||
loss = model(input_ids=input_ids, labels=input_ids)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(loss, torch.tensor(5.7819, dtype=torch.float, device=torch_device), atol=1e-3))
|
||||
self.assertTrue(torch.allclose(loss, torch.tensor(5.7854, dtype=torch.float, device=torch_device), atol=1e-3))
|
||||
loss.backward()
|
||||
# check last grads to cover all proable errors
|
||||
grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
|
||||
expected_grad_slice_word = torch.tensor(
|
||||
[2.6357e-05, 4.3358e-04, -8.4985e-04, 1.0094e-04, 3.8954e-04],
|
||||
[0.0004, 0.0003, 0.0006, -0.0004, 0.0002],
|
||||
dtype=torch.float,
|
||||
device=torch_device,
|
||||
)
|
||||
grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
|
||||
expected_grad_slice_pos_fac_1 = torch.tensor(
|
||||
[-0.0984, 0.6283, 0.4282, 1.2960, 0.6897],
|
||||
[-0.3792, 0.5593, -1.6993, 0.2033, 0.4131],
|
||||
dtype=torch.float,
|
||||
device=torch_device,
|
||||
)
|
||||
grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
|
||||
expected_grad_slice_pos_fac_2 = torch.tensor(
|
||||
[0.4626, -0.0231, -0.0172, 0.1081, 0.3805],
|
||||
[-1.4212, -0.3201, -1.1944, 0.1258, 0.2856],
|
||||
dtype=torch.float,
|
||||
device=torch_device,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user