🚨🚨🚨 Enforce single model initialization (#21431)

* Enforce single model initialization * Add OneFormer example for problem 3 * Do it the Stas way * Actually rename the uses... * Rewrite test * Try to change the test this way * Fix all init slow/fast tests * Break connection * Fix more tests * Fix test for initialization * Remove custom test * Quality * Fix last failing tests * The end?
2023-02-09 15:46:26 -05:00
parent 2020ac4bd6
commit 04b2f13c37
25 changed files with 277 additions and 123 deletions
--- a/tests/models/reformer/test_modeling_reformer.py
+++ b/tests/models/reformer/test_modeling_reformer.py
@@ -1145,10 +1145,11 @@ class ReformerIntegrationTests(unittest.TestCase):
        hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
        output_slice = hidden_states[1, -1, :5]
        expected_output_slice = torch.tensor(
-            [0.0256, -0.0121, 0.0636, 0.0024, -0.0393],
+            [0.1018, -0.2026, 0.2116, 0.0270, -0.1233],
            dtype=torch.float,
            device=torch_device,
        )
+
        self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))

    def test_local_lm_model_grad(self):
@@ -1163,25 +1164,25 @@ class ReformerIntegrationTests(unittest.TestCase):
        input_ids, _ = self._get_input_ids_and_mask()
        loss = model(input_ids=input_ids, labels=input_ids)[0]

-        self.assertTrue(torch.allclose(loss, torch.tensor(5.7786, dtype=torch.float, device=torch_device), atol=1e-3))
+        self.assertTrue(torch.allclose(loss, torch.tensor(5.8019, dtype=torch.float, device=torch_device), atol=1e-3))
        loss.backward()

        # check last grads to cover all proable errors
        grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
        expected_grad_slice_word = torch.tensor(
-            [-0.0005, 0.0001, 0.0002, 0.0003, 0.0006],
+            [-0.0005, -0.0001, -0.0002, -0.0006, -0.0006],
            dtype=torch.float,
            device=torch_device,
        )
        grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
        expected_grad_slice_pos_fac_1 = torch.tensor(
-            [0.0037, -1.3793, -1.0231, -1.5230, -2.5306],
+            [-0.5235, 0.5704, 0.0922, -0.3140, 0.9928],
            dtype=torch.float,
            device=torch_device,
        )
        grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
        expected_grad_slice_pos_fac_2 = torch.tensor(
-            [-1.3165, 0.5168, 0.7785, 1.0811, -0.9830],
+            [1.7960, 1.7668, 0.5593, 0.0907, 1.8342],
            dtype=torch.float,
            device=torch_device,
        )
@@ -1203,24 +1204,24 @@ class ReformerIntegrationTests(unittest.TestCase):
        input_ids, _ = self._get_input_ids_and_mask()
        loss = model(input_ids=input_ids, labels=input_ids)[0]

-        self.assertTrue(torch.allclose(loss, torch.tensor(5.7819, dtype=torch.float, device=torch_device), atol=1e-3))
+        self.assertTrue(torch.allclose(loss, torch.tensor(5.7854, dtype=torch.float, device=torch_device), atol=1e-3))
        loss.backward()
        # check last grads to cover all proable errors
        grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
        expected_grad_slice_word = torch.tensor(
-            [2.6357e-05, 4.3358e-04, -8.4985e-04, 1.0094e-04, 3.8954e-04],
+            [0.0004, 0.0003, 0.0006, -0.0004, 0.0002],
            dtype=torch.float,
            device=torch_device,
        )
        grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
        expected_grad_slice_pos_fac_1 = torch.tensor(
-            [-0.0984, 0.6283, 0.4282, 1.2960, 0.6897],
+            [-0.3792, 0.5593, -1.6993, 0.2033, 0.4131],
            dtype=torch.float,
            device=torch_device,
        )
        grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
        expected_grad_slice_pos_fac_2 = torch.tensor(
-            [0.4626, -0.0231, -0.0172, 0.1081, 0.3805],
+            [-1.4212, -0.3201, -1.1944, 0.1258, 0.2856],
            dtype=torch.float,
            device=torch_device,
        )