[CodeLlamaTokenizer] Nit, update __init__ to make sure the AddedTokens are not normalized because they are special (#27359)

* make sure tokens are properly initialized for codellama slow

* add m ore pretrained models

* style

* test more tokenizers checkpoints
This commit is contained in:
Arthur
2023-11-09 10:15:10 +01:00
committed by GitHub
parent 7ecd229ba4
commit 085ea7e56c
2 changed files with 5 additions and 3 deletions

View File

@@ -150,6 +150,8 @@ class CodeLlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.tokenizers_list = [
(self.rust_tokenizer_class, "hf-internal-testing/llama-code-tokenizer", {}),
(self.tokenizer_class, "hf-internal-testing/llama-code-tokenizer", {}),
(self.tokenizer_class, "codellama/CodeLlama-34b-Instruct-hf", {}),
(self.rust_tokenizer_class, "codellama/CodeLlama-34b-Instruct-hf", {}),
]
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):