add a test checking the format of convert_tokens_to_string's output (#16540)

* add new tests * add comment to overridden tests
2022-04-04 16:57:24 +02:00
parent 24a85cca61
commit be9474bd35
5 changed files with 53 additions and 0 deletions
--- a/tests/perceiver/test_tokenization_perceiver.py
+++ b/tests/perceiver/test_tokenization_perceiver.py
@@ -286,3 +286,14 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    # tests all ids in vocab => vocab doesn't exist so unnecessary to test
    def test_conversion_reversible(self):
        pass
+
+    def test_convert_tokens_to_string_format(self):
+        # The default common tokenizer tests uses invalid tokens for Perceiver that can only accept one-character
+        # strings and special added tokens as tokens
+        tokenizers = self.get_tokenizers(fast=True, do_lower_case=True)
+        for tokenizer in tokenizers:
+            with self.subTest(f"{tokenizer.__class__.__name__}"):
+                tokens = ["[CLS]", "t", "h", "i", "s", " ", "i", "s", " ", "a", " ", "t", "e", "s", "t", "[SEP]"]
+                string = tokenizer.convert_tokens_to_string(tokens)
+
+                self.assertIsInstance(string, str)