Flaubert auto tokenizer + tests

cc @julien-c
This commit is contained in:
Lysandre
2020-01-31 14:16:34 -05:00
parent d18d47be67
commit 1e82cd8457
4 changed files with 53 additions and 2 deletions

View File

@@ -39,6 +39,14 @@ if is_torch_available():
BertForQuestionAnswering,
)
from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers.modeling_auto import (
MODEL_MAPPING,
MODEL_FOR_PRETRAINING_MAPPING,
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
MODEL_WITH_LM_HEAD_MAPPING,
)
@require_torch
@@ -127,3 +135,26 @@ class AutoModelTest(unittest.TestCase):
self.assertIsInstance(model, RobertaForMaskedLM)
self.assertEqual(model.num_parameters(), 14830)
self.assertEqual(model.num_parameters(only_trainable=True), 14830)
def test_parents_and_children_in_mappings(self):
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
# by the parents and will return the wrong configuration type when using auto models
mappings = (
MODEL_MAPPING,
MODEL_FOR_PRETRAINING_MAPPING,
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
MODEL_WITH_LM_HEAD_MAPPING,
)
for mapping in mappings:
mapping = tuple(mapping.items())
for index, (child_config, child_model) in enumerate(mapping[1:]):
for parent_config, parent_model in mapping[: index + 1]:
with self.subTest(
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
self.assertFalse(issubclass(child_config, parent_config))
self.assertFalse(issubclass(child_model, parent_model))

View File

@@ -25,6 +25,7 @@ from transformers import (
GPT2Tokenizer,
RobertaTokenizer,
)
from transformers.tokenization_auto import TOKENIZER_MAPPING
from .utils import DUMMY_UNKWOWN_IDENTIFIER, SMALL_MODEL_IDENTIFIER, slow # noqa: F401
@@ -70,3 +71,19 @@ class AutoTokenizerTest(unittest.TestCase):
for tokenizer_class in [BertTokenizer, AutoTokenizer]:
with self.assertRaises(EnvironmentError):
_ = tokenizer_class.from_pretrained("julien-c/herlolip-not-exists")
def test_parents_and_children_in_mappings(self):
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
# by the parents and will return the wrong configuration type when using auto models
mappings = (TOKENIZER_MAPPING,)
for mapping in mappings:
mapping = tuple(mapping.items())
for index, (child_config, child_model) in enumerate(mapping[1:]):
for parent_config, parent_model in mapping[: index + 1]:
with self.subTest(
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
self.assertFalse(issubclass(child_config, parent_config))
self.assertFalse(issubclass(child_model, parent_model))