Update-llama-code (#25826)

* some bug fixes * updates * Update code_llama.md Co-authored-by: Omar Sanseviero <osanseviero@users.noreply.github.com> * Add co author Co-authored-by: pcuenca <pedro@latenitesoft.com> * add a test * fixup * nits * some updates * fix-coies * adress comments * nits * nits * fix docsting * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * update * add int for https://huggingface.co/spaces/hf-accelerate/model-memory-usage --------- Co-authored-by: Omar Sanseviero <osanseviero@users.noreply.github.com> Co-authored-by: pcuenca <pedro@latenitesoft.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2023-09-01 20:40:40 +02:00
parent 3587769c08
commit a4dd53d88e
4 changed files with 88 additions and 46 deletions
--- a/tests/models/code_llama/test_tokenization_code_llama.py
+++ b/tests/models/code_llama/test_tokenization_code_llama.py
@@ -65,6 +65,11 @@ class CodeLlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.save_pretrained(self.tmpdirname)

+    def test_no_infilling_init(self):
+        tokenizer = CodeLlamaTokenizer(SAMPLE_VOCAB, prefix_token=None, keep_accents=True)
+        with self.assertRaises(ValueError):
+            tokenizer.tokenize("This is <FILL_ME> prefix")
+
    def test_full_tokenizer(self):
        tokenizer = CodeLlamaTokenizer(SAMPLE_VOCAB, keep_accents=True)

@@ -587,8 +592,8 @@ split,
 end
 """,
        ]
-        tokenizer = CodeLlamaTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
-        tokenizer_fast = CodeLlamaTokenizerFast.from_pretrained("codellama/CodeLlama-7b-hf")
+        tokenizer = CodeLlamaTokenizer.from_pretrained("codellama/CodeLlama-7b-Instruct-hf")
+        tokenizer_fast = CodeLlamaTokenizerFast.from_pretrained("codellama/CodeLlama-7b-Instruct-hf")

        formatted_prompt = tokenizer.tokenize(PROMPTS[0])
        self.assertEqual(formatted_prompt, tokenizer_fast.tokenize(PROMPTS[0]))