Allow usage of TF Text BertTokenizer on TFBertTokenizer to make it servable on TF Serving (#19590)

* add suport for non fast tf bert tokenizer * add tests for non fast tf bert tokenizer * fix fast bert tf tokenizer flag * double tokenizers list on tf tokenizers test to aovid breaking zip on test output equivalence * reformat code with black to comply with code quality checks * trigger ci
2022-10-14 11:18:02 -03:00
parent 59b7334c87
commit 0e0b7cb72a
2 changed files with 31 additions and 6 deletions
--- a/tests/models/bert/test_tokenization_bert_tf.py
+++ b/tests/models/bert/test_tokenization_bert_tf.py
@@ -40,8 +40,15 @@ class BertTokenizationTest(unittest.TestCase):
    def setUp(self):
        super().setUp()

-        self.tokenizers = [BertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS]
-        self.tf_tokenizers = [TFBertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS]
+        self.tokenizers = [
+            BertTokenizer.from_pretrained(checkpoint) for checkpoint in (TOKENIZER_CHECKPOINTS * 2)
+        ]  # repeat for when fast_bert_tokenizer=false
+        self.tf_tokenizers = [TFBertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS] + [
+            TFBertTokenizer.from_pretrained(checkpoint, use_fast_bert_tokenizer=False)
+            for checkpoint in TOKENIZER_CHECKPOINTS
+        ]
+        assert len(self.tokenizers) == len(self.tf_tokenizers)
+
        self.test_sentences = [
            "This is a straightforward English test sentence.",
            "This one has some weird characters\rto\nsee\r\nif  those\u00E9break things.",