* add prefix space ignored in llama #29625 * adding test with add_prefix_space=False * ruff --------- Co-authored-by: Ita Zaporozhets <itazaporozhets@Itas-MBP.localdomain>
This commit is contained in:
@@ -163,6 +163,7 @@ class LlamaTokenizerFast(PreTrainedTokenizerFast):
|
|||||||
add_bos_token=add_bos_token,
|
add_bos_token=add_bos_token,
|
||||||
add_eos_token=add_eos_token,
|
add_eos_token=add_eos_token,
|
||||||
use_default_system_prompt=use_default_system_prompt,
|
use_default_system_prompt=use_default_system_prompt,
|
||||||
|
add_prefix_space=add_prefix_space,
|
||||||
legacy=legacy,
|
legacy=legacy,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -602,6 +602,10 @@ class LlamaIntegrationTest(unittest.TestCase):
|
|||||||
self.assertEqual(decoded_tokens, "hello")
|
self.assertEqual(decoded_tokens, "hello")
|
||||||
|
|
||||||
def test_no_prefix_space(self):
|
def test_no_prefix_space(self):
|
||||||
|
tokenizer_no_prefix_space = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", add_prefix_space=False)
|
||||||
|
no_prefix_space_tokens = tokenizer_no_prefix_space.tokenize("Hey")
|
||||||
|
self.assertEqual(no_prefix_space_tokens, ["H", "ey"])
|
||||||
|
|
||||||
tokenizer = LlamaTokenizerFast.from_pretrained(
|
tokenizer = LlamaTokenizerFast.from_pretrained(
|
||||||
"huggyllama/llama-7b", legacy=False, from_slow=True, add_prefix_space=False
|
"huggyllama/llama-7b", legacy=False, from_slow=True, add_prefix_space=False
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user