VLM: special multimodal Tokenizer (#34461)
* kinda works * update * add tests * update * use special tokens in processors * typo * fix copies * fix * fix moshi after rebase * update * fix tests * update * Update docs/source/en/main_classes/tokenizer.md Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * update docs * test for load time adding tokens * fix some more tests which are now fetched better * one more fix --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
ef976a7e18
commit
187439c3fa
@@ -299,7 +299,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
|
||||
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
|
||||
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
|
||||
|
||||
tokenizer._pad_token = None
|
||||
tokenizer.pad_token = None
|
||||
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
||||
with self.assertRaises(ValueError):
|
||||
# Expect error due to padding token missing
|
||||
@@ -978,7 +978,7 @@ class TFDataCollatorIntegrationTest(unittest.TestCase):
|
||||
self.assertEqual(batch["input_ids"].shape.as_list(), [2, 16])
|
||||
self.assertEqual(batch["labels"].shape.as_list(), [2, 16])
|
||||
|
||||
tokenizer._pad_token = None
|
||||
tokenizer.pad_token = None
|
||||
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="tf")
|
||||
with self.assertRaises(ValueError):
|
||||
# Expect error due to padding token missing
|
||||
@@ -1673,7 +1673,7 @@ class NumpyDataCollatorIntegrationTest(unittest.TestCase):
|
||||
self.assertEqual(batch["input_ids"].shape, (2, 16))
|
||||
self.assertEqual(batch["labels"].shape, (2, 16))
|
||||
|
||||
tokenizer._pad_token = None
|
||||
tokenizer.pad_token = None
|
||||
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="np")
|
||||
with self.assertRaises(ValueError):
|
||||
# Expect error due to padding token missing
|
||||
|
||||
Reference in New Issue
Block a user