From a1844a3209eb7e75582684809203bc189931a90c Mon Sep 17 00:00:00 2001 From: Ita Zaporozhets <31893021+itazap@users.noreply.github.com> Date: Tue, 23 Jul 2024 11:45:54 +0200 Subject: [PATCH] gguf conversion add_prefix_space=None for llama3 (#31937) * gguf conversion forces add_prefix_space=False for llama3, this is not required and forces from_slow, which fails. changing to None + test * typo * clean test --- src/transformers/integrations/ggml.py | 2 +- tests/quantization/ggml/test_ggml.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/transformers/integrations/ggml.py b/src/transformers/integrations/ggml.py index 5c2d72c345..71aa87afa9 100644 --- a/src/transformers/integrations/ggml.py +++ b/src/transformers/integrations/ggml.py @@ -609,7 +609,7 @@ class GGUFLlamaConverter(LlamaConverter): self.additional_kwargs["bos_token"] = eos_token if self.is_llama_3_tokenizer: - self.additional_kwargs["add_prefix_space"] = False + self.additional_kwargs["add_prefix_space"] = None self.additional_kwargs["clean_up_tokenization_spaces"] = True self.additional_kwargs["legacy"] = False diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py index db96e9052c..a5866094a1 100644 --- a/tests/quantization/ggml/test_ggml.py +++ b/tests/quantization/ggml/test_ggml.py @@ -174,10 +174,13 @@ class GgufIntegrationTests(unittest.TestCase): self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT) def test_llama3_q4_0_tokenizer(self): - tokenizer_gguf = AutoTokenizer.from_pretrained(self.llama3_model_id, gguf_file=self.q4_llama3_model_id) - special_sentence = "สวัสดี" - predicted_text = tokenizer_gguf.decode(tokenizer_gguf.encode(special_sentence, return_tensors="pt")[0]) - self.assertEqual(predicted_text, "<|begin_of_text|>" + special_sentence) + tokenizer = AutoTokenizer.from_pretrained(self.llama3_model_id, gguf_file=self.q4_llama3_model_id) + with tempfile.TemporaryDirectory() as tmpdirname: + tokenizer.save_pretrained(tmpdirname) + tokenizer = AutoTokenizer.from_pretrained(tmpdirname) + special_sentence = "สวัสดี" + predicted_text = tokenizer.decode(tokenizer.encode(special_sentence, return_tensors="pt")[0]) + self.assertEqual(predicted_text, "<|begin_of_text|>" + special_sentence) def test_llama3_q4_0(self): tokenizer = AutoTokenizer.from_pretrained(self.llama3_model_id, gguf_file=self.q4_llama3_model_id)