From 781bbc4d980abd2b21c332fd3122b733dba35d10 Mon Sep 17 00:00:00 2001 From: Ita Zaporozhets <31893021+itazap@users.noreply.github.com> Date: Wed, 11 Sep 2024 05:27:00 -0400 Subject: [PATCH] use diff internal model in tests (#33387) * use diff internal model in tests * use diff internal model in tests --- tests/models/llama/test_tokenization_llama.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/models/llama/test_tokenization_llama.py b/tests/models/llama/test_tokenization_llama.py index a4b6c8ebeb..c7e8b5e860 100644 --- a/tests/models/llama/test_tokenization_llama.py +++ b/tests/models/llama/test_tokenization_llama.py @@ -846,7 +846,8 @@ class TikTokenIntegrationTests(unittest.TestCase): """ def test_tiktoken_llama(self): - model_path = "hf-internal-testing/Llama3-Instruct-Internal" + model_path = "hf-internal-testing/llama-3-8b-internal" + subfolder = "original" test_text = "This is a test sentence." test_tokens = [128000, 2028, 374, 264, 1296, 11914, 13, 128001] num_reserved_special_tokens = 256 @@ -866,6 +867,7 @@ class TikTokenIntegrationTests(unittest.TestCase): tiktoken_tokenizer = PreTrainedTokenizerFast.from_pretrained( model_path, + subfolder=subfolder, additional_special_tokens=special_tokens, bos_token="<|begin_of_text|>", eos_token="<|end_of_text|>", @@ -874,7 +876,14 @@ class TikTokenIntegrationTests(unittest.TestCase): self.assertEqual(tokens[0], "<|begin_of_text|>") tiktoken_tokenizer = AutoTokenizer.from_pretrained( - model_path, legacy=False, additional_special_tokens=special_tokens, add_bos_token=True, add_eos_token=True + model_path, + subfolder=subfolder, + legacy=False, + additional_special_tokens=special_tokens, + bos_token="<|begin_of_text|>", + eos_token="<|end_of_text|>", + add_bos_token=True, + add_eos_token=True, ) self.assertTrue(isinstance(tiktoken_tokenizer, PreTrainedTokenizerFast)) @@ -892,7 +901,10 @@ class TikTokenIntegrationTests(unittest.TestCase): tiktoken_tokenizer = AutoTokenizer.from_pretrained( model_path, + subfolder=subfolder, additional_special_tokens=special_tokens, + bos_token="<|begin_of_text|>", + eos_token="<|end_of_text|>", from_slow=True, add_bos_token=True, add_eos_token=True,