MT5 should have an autotokenizer (#8743)
* MT5 should have an autotokenizer * Different configurations should be able to point to same tokenizers
This commit is contained in:
@@ -72,6 +72,7 @@ from .configuration_auto import (
|
|||||||
MarianConfig,
|
MarianConfig,
|
||||||
MBartConfig,
|
MBartConfig,
|
||||||
MobileBertConfig,
|
MobileBertConfig,
|
||||||
|
MT5Config,
|
||||||
OpenAIGPTConfig,
|
OpenAIGPTConfig,
|
||||||
PegasusConfig,
|
PegasusConfig,
|
||||||
ProphetNetConfig,
|
ProphetNetConfig,
|
||||||
@@ -173,6 +174,7 @@ TOKENIZER_MAPPING = OrderedDict(
|
|||||||
[
|
[
|
||||||
(RetriBertConfig, (RetriBertTokenizer, RetriBertTokenizerFast)),
|
(RetriBertConfig, (RetriBertTokenizer, RetriBertTokenizerFast)),
|
||||||
(T5Config, (T5Tokenizer, T5TokenizerFast)),
|
(T5Config, (T5Tokenizer, T5TokenizerFast)),
|
||||||
|
(MT5Config, (T5Tokenizer, T5TokenizerFast)),
|
||||||
(MobileBertConfig, (MobileBertTokenizer, MobileBertTokenizerFast)),
|
(MobileBertConfig, (MobileBertTokenizer, MobileBertTokenizerFast)),
|
||||||
(DistilBertConfig, (DistilBertTokenizer, DistilBertTokenizerFast)),
|
(DistilBertConfig, (DistilBertTokenizer, DistilBertTokenizerFast)),
|
||||||
(AlbertConfig, (AlbertTokenizer, AlbertTokenizerFast)),
|
(AlbertConfig, (AlbertTokenizer, AlbertTokenizerFast)),
|
||||||
|
|||||||
@@ -99,21 +99,13 @@ class AutoTokenizerTest(unittest.TestCase):
|
|||||||
|
|
||||||
for mapping in mappings:
|
for mapping in mappings:
|
||||||
mapping = tuple(mapping.items())
|
mapping = tuple(mapping.items())
|
||||||
for index, (child_config, (child_model_py, child_model_fast)) in enumerate(mapping[1:]):
|
for index, (child_config, _) in enumerate(mapping[1:]):
|
||||||
for parent_config, (parent_model_py, parent_model_fast) in mapping[: index + 1]:
|
for parent_config, _ in mapping[: index + 1]:
|
||||||
with self.subTest(
|
with self.subTest(
|
||||||
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
|
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
|
||||||
):
|
):
|
||||||
self.assertFalse(issubclass(child_config, parent_config))
|
self.assertFalse(issubclass(child_config, parent_config))
|
||||||
|
|
||||||
# Check for Slow tokenizer implementation if provided
|
|
||||||
if child_model_py and parent_model_py:
|
|
||||||
self.assertFalse(issubclass(child_model_py, parent_model_py))
|
|
||||||
|
|
||||||
# Check for Fast tokenizer implementation if provided
|
|
||||||
if child_model_fast and parent_model_fast:
|
|
||||||
self.assertFalse(issubclass(child_model_fast, parent_model_fast))
|
|
||||||
|
|
||||||
@require_tokenizers
|
@require_tokenizers
|
||||||
def test_from_pretrained_use_fast_toggle(self):
|
def test_from_pretrained_use_fast_toggle(self):
|
||||||
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
|
self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
|
||||||
|
|||||||
Reference in New Issue
Block a user