[LlamaTokenizerFast] nit update post_processor on the fly (#23855)
* Update the processor when changing add_eos and add_bos * fixup * update * add a test * fix failing tests * fixup
This commit is contained in:
@@ -315,6 +315,39 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
|
||||
def test_fast_special_tokens(self):
|
||||
slow_tokenizer = self.tokenizer
|
||||
fast_tokenizer = self.rust_tokenizer
|
||||
slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert slow == [1, 319, 4559, 1243]
|
||||
|
||||
fast_tokenizer.add_eos_token = False
|
||||
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert fast == [1, 319, 4559, 1243]
|
||||
|
||||
fast_tokenizer.add_eos_token = True
|
||||
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert fast == [1, 319, 4559, 1243, 2]
|
||||
|
||||
slow_tokenizer.add_eos_token = True
|
||||
slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert slow == [1, 319, 4559, 1243, 2]
|
||||
|
||||
fast_tokenizer = LlamaTokenizerFast.from_pretrained(
|
||||
"hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
|
||||
)
|
||||
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
|
||||
assert fast == [319, 4559, 1243, 2]
|
||||
|
||||
slow_tokenzier = LlamaTokenizer.from_pretrained(
|
||||
"hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
|
||||
)
|
||||
slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
|
||||
assert slow == [319, 4559, 1243, 2]
|
||||
|
||||
self.tokenizer.add_eos_token = False
|
||||
self.rust_tokenizer.add_eos_token = False
|
||||
|
||||
@slow
|
||||
def test_conversion(self):
|
||||
# This is excruciatingly slow since it has to recreate the entire merge
|
||||
|
||||
Reference in New Issue
Block a user