[StableLm] Add QK normalization and Parallel Residual Support (#29745)
* init: add StableLm 2 support * add integration test for parallel residual and qk layernorm * update(modeling): match qk norm naming for consistency with phi/persimmon * fix(tests): run fwd/bwd on random init test model to jitter norm weights off identity * `use_parallel_residual`: add copy pointer to `GPTNeoXLayer.forward` * refactor: rename head states var in `StableLmLayerNormPerHead` * tests: update test model and add generate check
This commit is contained in:
@@ -483,6 +483,40 @@ class StableLmModelIntegrationTest(unittest.TestCase):
|
||||
EXPECTED_TEXT_COMPLETION = """My favorite food has always been pizza, but lately I’ve been craving something different. I’ve been trying to eat healthier and I’ve"""
|
||||
self.assertEqual(text, EXPECTED_TEXT_COMPLETION)
|
||||
|
||||
@slow
|
||||
def test_model_tiny_random_stablelm_2_logits(self):
|
||||
# Check parallel residual and qk layernorm forward pass
|
||||
input_ids = {"input_ids": torch.tensor([[510, 8588, 310, 1900, 9386]], dtype=torch.long, device=torch_device)}
|
||||
|
||||
model = StableLmForCausalLM.from_pretrained("stabilityai/tiny-random-stablelm-2").to(torch_device)
|
||||
model.eval()
|
||||
|
||||
output = model(**input_ids).logits
|
||||
|
||||
# Expected mean on dim = -1
|
||||
EXPECTED_MEAN = torch.tensor([[-2.7196, -3.6099, -2.6877, -3.1973, -3.9344]]).to(torch_device)
|
||||
self.assertTrue(torch.allclose(output.mean(dim=-1), EXPECTED_MEAN, atol=1e-4, rtol=1e-4))
|
||||
|
||||
# Expected logits sliced from [0, 0, 0:30]
|
||||
EXPECTED_SLICE = torch.tensor([2.8364, 5.3811, 5.1659, 7.5485, 4.3219, 6.3315, 1.3967, 6.9147, 3.9679, 6.4786, 5.9176, 3.3067, 5.2917, 0.1485, 3.9630, 7.9947,10.6727, 9.6757, 8.8772, 8.3527, 7.8445, 6.6025, 5.5786, 7.0985,6.1369, 3.4259, 1.9397, 4.6157, 4.8105, 3.1768]).to(torch_device) # fmt: skip
|
||||
self.assertTrue(torch.allclose(output[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4))
|
||||
|
||||
@slow
|
||||
def test_model_tiny_random_stablelm_2_generation(self):
|
||||
# Check parallel residual and qk layernorm generation
|
||||
tokenizer = AutoTokenizer.from_pretrained("stabilityai/tiny-random-stablelm-2")
|
||||
model = StableLmForCausalLM.from_pretrained("stabilityai/tiny-random-stablelm-2")
|
||||
input_ids = tokenizer.encode(
|
||||
"My favorite ride at the amusement park",
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
outputs = model.generate(input_ids, max_new_tokens=20, temperature=0)
|
||||
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
|
||||
EXPECTED_TEXT_COMPLETION = """My favorite ride at the amusement park is the 2000-mile roller coaster. It's a thrilling ride filled with roller coast"""
|
||||
self.assertEqual(text, EXPECTED_TEXT_COMPLETION)
|
||||
|
||||
@require_bitsandbytes
|
||||
@slow
|
||||
@require_flash_attn
|
||||
|
||||
Reference in New Issue
Block a user