add a test checking the format of convert_tokens_to_string's output (#16540)
* add new tests * add comment to overridden tests
This commit is contained in:
@@ -286,3 +286,14 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
# tests all ids in vocab => vocab doesn't exist so unnecessary to test
|
||||
def test_conversion_reversible(self):
|
||||
pass
|
||||
|
||||
def test_convert_tokens_to_string_format(self):
|
||||
# The default common tokenizer tests uses invalid tokens for Perceiver that can only accept one-character
|
||||
# strings and special added tokens as tokens
|
||||
tokenizers = self.get_tokenizers(fast=True, do_lower_case=True)
|
||||
for tokenizer in tokenizers:
|
||||
with self.subTest(f"{tokenizer.__class__.__name__}"):
|
||||
tokens = ["[CLS]", "t", "h", "i", "s", " ", "i", "s", " ", "a", " ", "t", "e", "s", "t", "[SEP]"]
|
||||
string = tokenizer.convert_tokens_to_string(tokens)
|
||||
|
||||
self.assertIsInstance(string, str)
|
||||
|
||||
Reference in New Issue
Block a user