fix backend tokenizer args override: key mismatch (#10686)

* fix backend tokenizer args override: key mismatch

* no touching the docs

* fix mpnet

* add mpnet to test

* fix test

Co-authored-by: theo <theo@matussie.re>
This commit is contained in:
Théo Matussière
2021-03-19 03:13:45 +01:00
committed by GitHub
parent 427ea3fecb
commit 117dba9948
3 changed files with 15 additions and 4 deletions

View File

@@ -110,3 +110,14 @@ class AutoTokenizerTest(unittest.TestCase):
def test_from_pretrained_use_fast_toggle(self):
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased"), BertTokenizerFast)
@require_tokenizers
def test_do_lower_case(self):
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", do_lower_case=False)
sample = "Hello, world. How are you?"
tokens = tokenizer.tokenize(sample)
self.assertEqual("[UNK]", tokens[0])
tokenizer = AutoTokenizer.from_pretrained("microsoft/mpnet-base", do_lower_case=False)
tokens = tokenizer.tokenize(sample)
self.assertEqual("[UNK]", tokens[0])