Update-llama-code (#25826)
* some bug fixes * updates * Update code_llama.md Co-authored-by: Omar Sanseviero <osanseviero@users.noreply.github.com> * Add co author Co-authored-by: pcuenca <pedro@latenitesoft.com> * add a test * fixup * nits * some updates * fix-coies * adress comments * nits * nits * fix docsting * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * update * add int for https://huggingface.co/spaces/hf-accelerate/model-memory-usage --------- Co-authored-by: Omar Sanseviero <osanseviero@users.noreply.github.com> Co-authored-by: pcuenca <pedro@latenitesoft.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -65,6 +65,11 @@ class CodeLlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.save_pretrained(self.tmpdirname)
|
||||
|
||||
def test_no_infilling_init(self):
|
||||
tokenizer = CodeLlamaTokenizer(SAMPLE_VOCAB, prefix_token=None, keep_accents=True)
|
||||
with self.assertRaises(ValueError):
|
||||
tokenizer.tokenize("This is <FILL_ME> prefix")
|
||||
|
||||
def test_full_tokenizer(self):
|
||||
tokenizer = CodeLlamaTokenizer(SAMPLE_VOCAB, keep_accents=True)
|
||||
|
||||
@@ -587,8 +592,8 @@ split,
|
||||
end
|
||||
""",
|
||||
]
|
||||
tokenizer = CodeLlamaTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
|
||||
tokenizer_fast = CodeLlamaTokenizerFast.from_pretrained("codellama/CodeLlama-7b-hf")
|
||||
tokenizer = CodeLlamaTokenizer.from_pretrained("codellama/CodeLlama-7b-Instruct-hf")
|
||||
tokenizer_fast = CodeLlamaTokenizerFast.from_pretrained("codellama/CodeLlama-7b-Instruct-hf")
|
||||
|
||||
formatted_prompt = tokenizer.tokenize(PROMPTS[0])
|
||||
self.assertEqual(formatted_prompt, tokenizer_fast.tokenize(PROMPTS[0]))
|
||||
|
||||
Reference in New Issue
Block a user