VLM: special multimodal Tokenizer (#34461)
* kinda works * update * add tests * update * use special tokens in processors * typo * fix copies * fix * fix moshi after rebase * update * fix tests * update * Update docs/source/en/main_classes/tokenizer.md Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * update docs * test for load time adding tokens * fix some more tests which are now fetched better * one more fix --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
ef976a7e18
commit
187439c3fa
@@ -1537,7 +1537,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
special_tokens_map = {}
|
||||
for token in special_tokens_list:
|
||||
# Get the private one to avoid unnecessary warnings.
|
||||
if getattr(tokenizer, f"_{token}") is not None:
|
||||
if getattr(tokenizer, token) is not None:
|
||||
special_token = getattr(tokenizer, token)
|
||||
special_tokens_map[special_token] = f"{special_token}a"
|
||||
|
||||
@@ -1549,7 +1549,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
# Check the changes
|
||||
for token in special_tokens_list:
|
||||
# Get the private one to avoid unnecessary warnings.
|
||||
if getattr(tokenizer, f"_{token}") is None:
|
||||
if getattr(tokenizer, token) is None:
|
||||
continue
|
||||
special_token = getattr(tokenizer, token)
|
||||
if special_token in special_tokens_map:
|
||||
|
||||
Reference in New Issue
Block a user