[Almost all TF models] TF clean up: add missing CLM / MLM loss; fix T5 naming and keras compile (#5395)

* add first version of clm tf

* make style

* add more tests for bert

* update tf clm loss

* fix tests

* correct tf ner script

* add mlm loss

* delete bogus file

* clean tf auto model + add tests

* finish adding clm loss everywhere

* fix training in distilbert

* fix flake8

* save intermediate

* fix tf t5 naming

* remove prints

* finish up

* up

* fix tf gpt2

* fix new test utils import

* fix flake8

* keep backward compatibility

* Update src/transformers/modeling_tf_albert.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_auto.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_electra.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_roberta.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_mobilebert.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_auto.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_bert.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_distilbert.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* apply sylvains suggestions

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Patrick von Platen
2020-07-07 18:15:53 +02:00
committed by GitHub
parent 33e43edddc
commit 4dc65591b5
23 changed files with 1516 additions and 315 deletions

View File

@@ -24,6 +24,8 @@ if is_tf_available():
from transformers import (
AutoConfig,
BertConfig,
GPT2Config,
T5Config,
TFAutoModel,
TFBertModel,
TFAutoModelForPreTraining,
@@ -35,6 +37,25 @@ if is_tf_available():
TFBertForSequenceClassification,
TFAutoModelForQuestionAnswering,
TFBertForQuestionAnswering,
TFAutoModelForCausalLM,
TFGPT2LMHeadModel,
TFAutoModelForMaskedLM,
TFAutoModelForSeq2SeqLM,
TFT5ForConditionalGeneration,
)
from transformers.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST
from transformers.modeling_tf_auto import (
TF_MODEL_MAPPING,
TF_MODEL_FOR_PRETRAINING_MAPPING,
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_WITH_LM_HEAD_MAPPING,
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
)
@@ -72,10 +93,21 @@ class TFAutoModelTest(unittest.TestCase):
self.assertIsNotNone(model)
self.assertIsInstance(model, TFBertForPreTraining)
@slow
def test_model_for_causal_lm(self):
for model_name in TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, GPT2Config)
model = TFAutoModelForCausalLM.from_pretrained(model_name)
model, loading_info = TFAutoModelForCausalLM.from_pretrained(model_name, output_loading_info=True)
self.assertIsNotNone(model)
self.assertIsInstance(model, TFGPT2LMHeadModel)
@slow
def test_lmhead_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
@@ -84,6 +116,30 @@ class TFAutoModelTest(unittest.TestCase):
self.assertIsNotNone(model)
self.assertIsInstance(model, TFBertForMaskedLM)
@slow
def test_model_for_masked_lm(self):
for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig)
model = TFAutoModelForMaskedLM.from_pretrained(model_name)
model, loading_info = TFAutoModelForMaskedLM.from_pretrained(model_name, output_loading_info=True)
self.assertIsNotNone(model)
self.assertIsInstance(model, TFBertForMaskedLM)
@slow
def test_model_for_encoder_decoder_lm(self):
for model_name in TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config)
self.assertIsInstance(config, T5Config)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)
model, loading_info = TFAutoModelForSeq2SeqLM.from_pretrained(model_name, output_loading_info=True)
self.assertIsNotNone(model)
self.assertIsInstance(model, TFT5ForConditionalGeneration)
@slow
def test_sequence_classification_model_from_pretrained(self):
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@@ -119,3 +175,28 @@ class TFAutoModelTest(unittest.TestCase):
self.assertIsInstance(model, TFRobertaForMaskedLM)
self.assertEqual(model.num_parameters(), 14830)
self.assertEqual(model.num_parameters(only_trainable=True), 14830)
def test_parents_and_children_in_mappings(self):
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
# by the parents and will return the wrong configuration type when using auto models
mappings = (
TF_MODEL_MAPPING,
TF_MODEL_FOR_PRETRAINING_MAPPING,
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_WITH_LM_HEAD_MAPPING,
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
)
for mapping in mappings:
mapping = tuple(mapping.items())
for index, (child_config, child_model) in enumerate(mapping[1:]):
for parent_config, parent_model in mapping[: index + 1]:
with self.subTest(
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
):
self.assertFalse(issubclass(child_config, parent_config))
self.assertFalse(issubclass(child_model, parent_model))