Remove unnecessary unsqueeze - squeeze in rotary positional embedding (#26162)

* remove unnecessary unsqueeze-squeeze in llama

* correct other models

* fix

* revert gpt_neox_japanese

* fix copie

* fix test
This commit is contained in:
fxmarty
2023-10-06 11:25:15 +02:00
committed by GitHub
parent 65aabafe2f
commit 64845307b3
9 changed files with 67 additions and 83 deletions

View File

@@ -430,7 +430,8 @@ class MistralIntegrationTest(unittest.TestCase):
def test_model_7b_logits(self):
input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
out = model(torch.tensor([input_ids])).logits
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
out = model(input_ids).logits.cpu()
# Expected mean on dim = -1
EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]])
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)