Remove unnecessary unsqueeze - squeeze in rotary positional embedding (#26162)

* remove unnecessary unsqueeze-squeeze in llama * correct other models * fix * revert gpt_neox_japanese * fix copie * fix test
2023-10-06 11:25:15 +02:00
parent 65aabafe2f
commit 64845307b3
9 changed files with 67 additions and 83 deletions
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -430,7 +430,8 @@ class MistralIntegrationTest(unittest.TestCase):
    def test_model_7b_logits(self):
        input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
        model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
-        out = model(torch.tensor([input_ids])).logits
+        input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
+        out = model(input_ids).logits.cpu()
        # Expected mean on dim = -1
        EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]])
        torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)