From b99098abc75057f9c0026bc51b06b36e4e43417a Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Mon, 10 Aug 2020 10:39:17 -0400 Subject: [PATCH] Patch models (#6326) * TFAlbertFor{TokenClassification, MultipleChoice} * Patch models * BERT and TF BERT info s * Update check_repo --- src/transformers/modeling_bert.py | 13 +++++++++---- src/transformers/modeling_tf_bert.py | 13 +++++++++---- tests/test_modeling_tf_albert.py | 17 +++++++++++++++++ tests/test_modeling_tf_bert.py | 1 + tests/test_modeling_tf_electra.py | 1 + utils/check_repo.py | 6 ------ 6 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index fb2a2a510e..9d46495fbd 100644 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -933,7 +933,9 @@ class BertForPreTraining(BertPreTrainedModel): class BertLMHeadModel(BertPreTrainedModel): def __init__(self, config): super().__init__(config) - assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`." + + if not config.is_decoder: + logger.info("If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`") self.bert = BertModel(config) self.cls = BertOnlyMLMHead(config) @@ -1036,9 +1038,12 @@ class BertLMHeadModel(BertPreTrainedModel): class BertForMaskedLM(BertPreTrainedModel): def __init__(self, config): super().__init__(config) - assert ( - not config.is_decoder - ), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention." + + if config.is_decoder: + logger.info( + "If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for " + "bi-directional self-attention." + ) self.bert = BertModel(config) self.cls = BertOnlyMLMHead(config) diff --git a/src/transformers/modeling_tf_bert.py b/src/transformers/modeling_tf_bert.py index 6768c6765c..532997e097 100644 --- a/src/transformers/modeling_tf_bert.py +++ b/src/transformers/modeling_tf_bert.py @@ -860,9 +860,12 @@ class TFBertForPreTraining(TFBertPreTrainedModel): class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) - assert ( - not config.is_decoder - ), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention." + + if config.is_decoder: + logger.info( + "If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for " + "bi-directional self-attention." + ) self.bert = TFBertMainLayer(config, name="bert") self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls") @@ -936,7 +939,9 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss): def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) - assert config.is_decoder, "If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`" + + if not config.is_decoder: + logger.info("If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`") self.bert = TFBertMainLayer(config, name="bert") self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls") diff --git a/tests/test_modeling_tf_albert.py b/tests/test_modeling_tf_albert.py index 58a832cfdc..6da6556b26 100644 --- a/tests/test_modeling_tf_albert.py +++ b/tests/test_modeling_tf_albert.py @@ -32,6 +32,7 @@ if is_tf_available(): TFAlbertForMultipleChoice, TFAlbertForSequenceClassification, TFAlbertForQuestionAnswering, + TFAlbertForTokenClassification, TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST, ) @@ -109,6 +110,7 @@ class TFAlbertModelTester: config = AlbertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, + embedding_size=self.embedding_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, @@ -198,6 +200,19 @@ class TFAlbertModelTester: result = model(inputs) self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices]) + def create_and_check_albert_for_token_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = TFAlbertForTokenClassification(config=config) + inputs = { + "input_ids": input_ids, + "attention_mask": input_mask, + "token_type_ids": token_type_ids, + } + result = model(inputs) + self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]) + def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() ( @@ -223,6 +238,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): TFAlbertForMaskedLM, TFAlbertForSequenceClassification, TFAlbertForQuestionAnswering, + TFAlbertForTokenClassification, + TFAlbertForMultipleChoice, ) if is_tf_available() else () diff --git a/tests/test_modeling_tf_bert.py b/tests/test_modeling_tf_bert.py index 5026ce55fb..d759b3cdf3 100644 --- a/tests/test_modeling_tf_bert.py +++ b/tests/test_modeling_tf_bert.py @@ -265,6 +265,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): ( TFBertModel, TFBertForMaskedLM, + TFBertLMHeadModel, TFBertForNextSentencePrediction, TFBertForPreTraining, TFBertForQuestionAnswering, diff --git a/tests/test_modeling_tf_electra.py b/tests/test_modeling_tf_electra.py index 90bb7277f4..9422c8794e 100644 --- a/tests/test_modeling_tf_electra.py +++ b/tests/test_modeling_tf_electra.py @@ -202,6 +202,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): TFElectraForTokenClassification, TFElectraForMultipleChoice, TFElectraForSequenceClassification, + TFElectraForQuestionAnswering, ) if is_tf_available() else () diff --git a/utils/check_repo.py b/utils/check_repo.py index ca3743b265..fa3d042055 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -18,12 +18,6 @@ IGNORE_NON_TESTED = [ "DPRSpanPredictor", # Building part of bigger (tested) model. "ReformerForMaskedLM", # Needs to be setup as decoder. "T5Stack", # Building part of bigger (tested) model. - "TFAlbertForMultipleChoice", # TODO: fix - "TFAlbertForTokenClassification", # TODO: fix - "TFBertLMHeadModel", # TODO: fix - "TFElectraForMultipleChoice", # Fix is in #6284 - "TFElectraForQuestionAnswering", # TODO: fix - "TFElectraForSequenceClassification", # Fix is in #6284 "TFElectraMainLayer", # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?) "TFRobertaForMultipleChoice", # TODO: fix ]