add a test checking the format of convert_tokens_to_string's output (#16540)

* add new tests

* add comment to overridden tests
This commit is contained in:
SaulLu
2022-04-04 16:57:24 +02:00
committed by GitHub
parent 24a85cca61
commit be9474bd35
5 changed files with 53 additions and 0 deletions

View File

@@ -286,3 +286,14 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
# tests all ids in vocab => vocab doesn't exist so unnecessary to test
def test_conversion_reversible(self):
pass
def test_convert_tokens_to_string_format(self):
# The default common tokenizer tests uses invalid tokens for Perceiver that can only accept one-character
# strings and special added tokens as tokens
tokenizers = self.get_tokenizers(fast=True, do_lower_case=True)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
tokens = ["[CLS]", "t", "h", "i", "s", " ", "i", "s", " ", "a", " ", "t", "e", "s", "t", "[SEP]"]
string = tokenizer.convert_tokens_to_string(tokens)
self.assertIsInstance(string, str)