Remove unnecessary unsqueeze - squeeze in rotary positional embedding (#26162)
* remove unnecessary unsqueeze-squeeze in llama * correct other models * fix * revert gpt_neox_japanese * fix copie * fix test
This commit is contained in:
@@ -430,7 +430,8 @@ class MistralIntegrationTest(unittest.TestCase):
|
||||
def test_model_7b_logits(self):
|
||||
input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
|
||||
model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
|
||||
out = model(torch.tensor([input_ids])).logits
|
||||
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
|
||||
out = model(input_ids).logits.cpu()
|
||||
# Expected mean on dim = -1
|
||||
EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]])
|
||||
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)
|
||||
|
||||
Reference in New Issue
Block a user