Updating the TensorFlow models to work as expected with tokenizers v3.0.0 (#3684)

* Updating modeling tf files; adding tests * Merge `encode_plus` and `batch_encode_plus`
2020-04-08 16:22:44 -04:00
parent 500aa12318
commit 6435b9f908
14 changed files with 129 additions and 12 deletions
--- a/tests/test_tokenization_distilbert.py
+++ b/tests/test_tokenization_distilbert.py
@@ -14,7 +14,7 @@
 # limitations under the License.


-from transformers.tokenization_distilbert import DistilBertTokenizer
+from transformers.tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast

 from .test_tokenization_bert import BertTokenizationTest
 from .utils import slow
@@ -27,6 +27,9 @@ class DistilBertTokenizationTest(BertTokenizationTest):
    def get_tokenizer(self, **kwargs):
        return DistilBertTokenizer.from_pretrained(self.tmpdirname, **kwargs)

+    def get_rust_tokenizer(self, **kwargs):
+        return DistilBertTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)
+
    @slow
    def test_sequence_builders(self):
        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")