Fix the incorrect permutation of gguf (#31788)

* Fix the incorrect permutation of gguf * rename num_kv_heads Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> * add typing to num_kv_heads Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> * rename variables * refactor permute function name * update the expected text of the llama3 q4 test --------- Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
2024-07-16 14:20:34 +08:00
parent 6fbea6d237
commit ac946aac25
2 changed files with 19 additions and 6 deletions
--- a/tests/quantization/ggml/test_ggml.py
+++ b/tests/quantization/ggml/test_ggml.py
@@ -188,8 +188,7 @@ class GgufIntegrationTests(unittest.TestCase):
        text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
        out = model.generate(**text, max_new_tokens=10)

-        EXPECTED_TEXT = "Hello, I am new to this forum. I am"
-
+        EXPECTED_TEXT = "Hello, I am interested in [The Park]\nThe"
        self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)

    def test_tokenization_xnli(self):