From b99098abc75057f9c0026bc51b06b36e4e43417a Mon Sep 17 00:00:00 2001
From: Lysandre Debut <lysandre@huggingface.co>
Date: Mon, 10 Aug 2020 10:39:17 -0400
Subject: [PATCH] Patch models (#6326)

* TFAlbertFor{TokenClassification, MultipleChoice}

* Patch models

* BERT and TF BERT info


s

* Update check_repo
---
 src/transformers/modeling_bert.py    | 13 +++++++++----
 src/transformers/modeling_tf_bert.py | 13 +++++++++----
 tests/test_modeling_tf_albert.py     | 17 +++++++++++++++++
 tests/test_modeling_tf_bert.py       |  1 +
 tests/test_modeling_tf_electra.py    |  1 +
 utils/check_repo.py                  |  6 ------
 6 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py
index fb2a2a510e..9d46495fbd 100644
--- a/src/transformers/modeling_bert.py
+++ b/src/transformers/modeling_bert.py
@@ -933,7 +933,9 @@ class BertForPreTraining(BertPreTrainedModel):
 class BertLMHeadModel(BertPreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
-        assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`."
+
+        if not config.is_decoder:
+            logger.info("If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`")
 
         self.bert = BertModel(config)
         self.cls = BertOnlyMLMHead(config)
@@ -1036,9 +1038,12 @@ class BertLMHeadModel(BertPreTrainedModel):
 class BertForMaskedLM(BertPreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
-        assert (
-            not config.is_decoder
-        ), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
+
+        if config.is_decoder:
+            logger.info(
+                "If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
+                "bi-directional self-attention."
+            )
 
         self.bert = BertModel(config)
         self.cls = BertOnlyMLMHead(config)
diff --git a/src/transformers/modeling_tf_bert.py b/src/transformers/modeling_tf_bert.py
index 6768c6765c..532997e097 100644
--- a/src/transformers/modeling_tf_bert.py
+++ b/src/transformers/modeling_tf_bert.py
@@ -860,9 +860,12 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
 class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
     def __init__(self, config, *inputs, **kwargs):
         super().__init__(config, *inputs, **kwargs)
-        assert (
-            not config.is_decoder
-        ), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
+
+        if config.is_decoder:
+            logger.info(
+                "If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
+                "bi-directional self-attention."
+            )
 
         self.bert = TFBertMainLayer(config, name="bert")
         self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
@@ -936,7 +939,9 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
 class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
     def __init__(self, config, *inputs, **kwargs):
         super().__init__(config, *inputs, **kwargs)
-        assert config.is_decoder, "If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`"
+
+        if not config.is_decoder:
+            logger.info("If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`")
 
         self.bert = TFBertMainLayer(config, name="bert")
         self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
diff --git a/tests/test_modeling_tf_albert.py b/tests/test_modeling_tf_albert.py
index 58a832cfdc..6da6556b26 100644
--- a/tests/test_modeling_tf_albert.py
+++ b/tests/test_modeling_tf_albert.py
@@ -32,6 +32,7 @@ if is_tf_available():
         TFAlbertForMultipleChoice,
         TFAlbertForSequenceClassification,
         TFAlbertForQuestionAnswering,
+        TFAlbertForTokenClassification,
         TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
     )
 
@@ -109,6 +110,7 @@ class TFAlbertModelTester:
         config = AlbertConfig(
             vocab_size=self.vocab_size,
             hidden_size=self.hidden_size,
+            embedding_size=self.embedding_size,
             num_hidden_layers=self.num_hidden_layers,
             num_attention_heads=self.num_attention_heads,
             intermediate_size=self.intermediate_size,
@@ -198,6 +200,19 @@ class TFAlbertModelTester:
         result = model(inputs)
         self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
 
+    def create_and_check_albert_for_token_classification(
+        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    ):
+        config.num_labels = self.num_labels
+        model = TFAlbertForTokenClassification(config=config)
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": input_mask,
+            "token_type_ids": token_type_ids,
+        }
+        result = model(inputs)
+        self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
+
     def prepare_config_and_inputs_for_common(self):
         config_and_inputs = self.prepare_config_and_inputs()
         (
@@ -223,6 +238,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
             TFAlbertForMaskedLM,
             TFAlbertForSequenceClassification,
             TFAlbertForQuestionAnswering,
+            TFAlbertForTokenClassification,
+            TFAlbertForMultipleChoice,
         )
         if is_tf_available()
         else ()
diff --git a/tests/test_modeling_tf_bert.py b/tests/test_modeling_tf_bert.py
index 5026ce55fb..d759b3cdf3 100644
--- a/tests/test_modeling_tf_bert.py
+++ b/tests/test_modeling_tf_bert.py
@@ -265,6 +265,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
         (
             TFBertModel,
             TFBertForMaskedLM,
+            TFBertLMHeadModel,
             TFBertForNextSentencePrediction,
             TFBertForPreTraining,
             TFBertForQuestionAnswering,
diff --git a/tests/test_modeling_tf_electra.py b/tests/test_modeling_tf_electra.py
index 90bb7277f4..9422c8794e 100644
--- a/tests/test_modeling_tf_electra.py
+++ b/tests/test_modeling_tf_electra.py
@@ -202,6 +202,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
             TFElectraForTokenClassification,
             TFElectraForMultipleChoice,
             TFElectraForSequenceClassification,
+            TFElectraForQuestionAnswering,
         )
         if is_tf_available()
         else ()
diff --git a/utils/check_repo.py b/utils/check_repo.py
index ca3743b265..fa3d042055 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -18,12 +18,6 @@ IGNORE_NON_TESTED = [
     "DPRSpanPredictor",  # Building part of bigger (tested) model.
     "ReformerForMaskedLM",  # Needs to be setup as decoder.
     "T5Stack",  # Building part of bigger (tested) model.
-    "TFAlbertForMultipleChoice",  # TODO: fix
-    "TFAlbertForTokenClassification",  # TODO: fix
-    "TFBertLMHeadModel",  # TODO: fix
-    "TFElectraForMultipleChoice",  # Fix is in #6284
-    "TFElectraForQuestionAnswering",  # TODO: fix
-    "TFElectraForSequenceClassification",  # Fix is in #6284
     "TFElectraMainLayer",  # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?)
     "TFRobertaForMultipleChoice",  # TODO: fix
 ]