Fix GPT-NeoX-20B past handling, attention computation (#17811)

* Fix GPT-NeoX-20B past handling, swap attention computation to hopefully avoid NaN, update docs * 20B tests
2022-06-30 05:47:40 -07:00
parent 692e61e91a
commit 205bc4152c
3 changed files with 27 additions and 18 deletions
--- a/tests/models/gpt_neox/test_modeling_gpt_neox.py
+++ b/tests/models/gpt_neox/test_modeling_gpt_neox.py
@@ -226,6 +226,10 @@ class GPTNeoXModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_causal_lm(*config_and_inputs)

+    @unittest.skip(reason="Feed forward chunking is not implemented")
+    def test_feed_forward_chunking(self):
+        pass
+
    @slow
    def test_model_from_pretrained(self):
        for model_name in GPT_NEOX_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@@ -247,7 +251,7 @@ class GPTNeoXModelIntegrationTest(unittest.TestCase):
        self.assertEqual(output.shape, expected_shape)

        expected_slice = torch.tensor(
-            [[[33.8045, 2.3958, 34.2816], [63.7805, 4.8332, 63.5882], [66.9116, 5.2198, 63.1185]]]
+            [[[33.5938, 2.3789, 34.0312], [63.4688, 4.8164, 63.3438], [66.8750, 5.2422, 63.0625]]]
        )

        self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))