[StableLm] Add QK normalization and Parallel Residual Support (#29745)

* init: add StableLm 2 support * add integration test for parallel residual and qk layernorm * update(modeling): match qk norm naming for consistency with phi/persimmon * fix(tests): run fwd/bwd on random init test model to jitter norm weights off identity * `use_parallel_residual`: add copy pointer to `GPTNeoXLayer.forward` * refactor: rename head states var in `StableLmLayerNormPerHead` * tests: update test model and add generate check
2024-04-08 17:51:58 -04:00
parent 8c00b53eb0
commit 2f12e40822
3 changed files with 97 additions and 10 deletions
--- a/tests/models/stablelm/test_modeling_stablelm.py
+++ b/tests/models/stablelm/test_modeling_stablelm.py
@@ -483,6 +483,40 @@ class StableLmModelIntegrationTest(unittest.TestCase):
        EXPECTED_TEXT_COMPLETION = """My favorite food has always been pizza, but lately I’ve been craving something different. I’ve been trying to eat healthier and I’ve"""
        self.assertEqual(text, EXPECTED_TEXT_COMPLETION)

+    @slow
+    def test_model_tiny_random_stablelm_2_logits(self):
+        # Check parallel residual and qk layernorm forward pass
+        input_ids = {"input_ids": torch.tensor([[510, 8588, 310, 1900, 9386]], dtype=torch.long, device=torch_device)}
+
+        model = StableLmForCausalLM.from_pretrained("stabilityai/tiny-random-stablelm-2").to(torch_device)
+        model.eval()
+
+        output = model(**input_ids).logits
+
+        # Expected mean on dim = -1
+        EXPECTED_MEAN = torch.tensor([[-2.7196, -3.6099, -2.6877, -3.1973, -3.9344]]).to(torch_device)
+        self.assertTrue(torch.allclose(output.mean(dim=-1), EXPECTED_MEAN, atol=1e-4, rtol=1e-4))
+
+        # Expected logits sliced from [0, 0, 0:30]
+        EXPECTED_SLICE = torch.tensor([2.8364, 5.3811, 5.1659, 7.5485, 4.3219, 6.3315, 1.3967, 6.9147, 3.9679, 6.4786, 5.9176, 3.3067, 5.2917, 0.1485, 3.9630, 7.9947,10.6727, 9.6757, 8.8772, 8.3527, 7.8445, 6.6025, 5.5786, 7.0985,6.1369, 3.4259, 1.9397, 4.6157, 4.8105, 3.1768]).to(torch_device)  # fmt: skip
+        self.assertTrue(torch.allclose(output[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4))
+
+    @slow
+    def test_model_tiny_random_stablelm_2_generation(self):
+        # Check parallel residual and qk layernorm generation
+        tokenizer = AutoTokenizer.from_pretrained("stabilityai/tiny-random-stablelm-2")
+        model = StableLmForCausalLM.from_pretrained("stabilityai/tiny-random-stablelm-2")
+        input_ids = tokenizer.encode(
+            "My favorite ride at the amusement park",
+            return_tensors="pt",
+        )
+
+        outputs = model.generate(input_ids, max_new_tokens=20, temperature=0)
+        text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+        EXPECTED_TEXT_COMPLETION = """My favorite ride at the amusement park is the 2000-mile roller coaster. It's a thrilling ride filled with roller coast"""
+        self.assertEqual(text, EXPECTED_TEXT_COMPLETION)
+
    @require_bitsandbytes
    @slow
    @require_flash_attn