MT5 should have an autotokenizer (#8743)

* MT5 should have an autotokenizer

* Different configurations should be able to point to same tokenizers
This commit is contained in:
Lysandre Debut
2020-11-24 09:50:25 -05:00
committed by GitHub
parent 6fdd0bb231
commit e09e54fd9d
2 changed files with 4 additions and 10 deletions

View File

@@ -99,21 +99,13 @@ class AutoTokenizerTest(unittest.TestCase):
for mapping in mappings:
mapping = tuple(mapping.items())
for index, (child_config, (child_model_py, child_model_fast)) in enumerate(mapping[1:]):
for parent_config, (parent_model_py, parent_model_fast) in mapping[: index + 1]:
for index, (child_config, _) in enumerate(mapping[1:]):
for parent_config, _ in mapping[: index + 1]:
with self.subTest(
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
self.assertFalse(issubclass(child_config, parent_config))
# Check for Slow tokenizer implementation if provided
if child_model_py and parent_model_py:
self.assertFalse(issubclass(child_model_py, parent_model_py))
# Check for Fast tokenizer implementation if provided
if child_model_fast and parent_model_fast:
self.assertFalse(issubclass(child_model_fast, parent_model_fast))
@require_tokenizers
def test_from_pretrained_use_fast_toggle(self):
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)