Patch models (#6326)
* TFAlbertFor{TokenClassification, MultipleChoice}
* Patch models
* BERT and TF BERT info
s
* Update check_repo
This commit is contained in:
@@ -933,7 +933,9 @@ class BertForPreTraining(BertPreTrainedModel):
|
|||||||
class BertLMHeadModel(BertPreTrainedModel):
|
class BertLMHeadModel(BertPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`."
|
|
||||||
|
if not config.is_decoder:
|
||||||
|
logger.info("If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`")
|
||||||
|
|
||||||
self.bert = BertModel(config)
|
self.bert = BertModel(config)
|
||||||
self.cls = BertOnlyMLMHead(config)
|
self.cls = BertOnlyMLMHead(config)
|
||||||
@@ -1036,9 +1038,12 @@ class BertLMHeadModel(BertPreTrainedModel):
|
|||||||
class BertForMaskedLM(BertPreTrainedModel):
|
class BertForMaskedLM(BertPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
super().__init__(config)
|
||||||
assert (
|
|
||||||
not config.is_decoder
|
if config.is_decoder:
|
||||||
), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
|
logger.info(
|
||||||
|
"If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
|
||||||
|
"bi-directional self-attention."
|
||||||
|
)
|
||||||
|
|
||||||
self.bert = BertModel(config)
|
self.bert = BertModel(config)
|
||||||
self.cls = BertOnlyMLMHead(config)
|
self.cls = BertOnlyMLMHead(config)
|
||||||
|
|||||||
@@ -860,9 +860,12 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
|
|||||||
class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||||
def __init__(self, config, *inputs, **kwargs):
|
def __init__(self, config, *inputs, **kwargs):
|
||||||
super().__init__(config, *inputs, **kwargs)
|
super().__init__(config, *inputs, **kwargs)
|
||||||
assert (
|
|
||||||
not config.is_decoder
|
if config.is_decoder:
|
||||||
), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
|
logger.info(
|
||||||
|
"If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
|
||||||
|
"bi-directional self-attention."
|
||||||
|
)
|
||||||
|
|
||||||
self.bert = TFBertMainLayer(config, name="bert")
|
self.bert = TFBertMainLayer(config, name="bert")
|
||||||
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
|
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
|
||||||
@@ -936,7 +939,9 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
|||||||
class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
|
class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||||
def __init__(self, config, *inputs, **kwargs):
|
def __init__(self, config, *inputs, **kwargs):
|
||||||
super().__init__(config, *inputs, **kwargs)
|
super().__init__(config, *inputs, **kwargs)
|
||||||
assert config.is_decoder, "If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`"
|
|
||||||
|
if not config.is_decoder:
|
||||||
|
logger.info("If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`")
|
||||||
|
|
||||||
self.bert = TFBertMainLayer(config, name="bert")
|
self.bert = TFBertMainLayer(config, name="bert")
|
||||||
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
|
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ if is_tf_available():
|
|||||||
TFAlbertForMultipleChoice,
|
TFAlbertForMultipleChoice,
|
||||||
TFAlbertForSequenceClassification,
|
TFAlbertForSequenceClassification,
|
||||||
TFAlbertForQuestionAnswering,
|
TFAlbertForQuestionAnswering,
|
||||||
|
TFAlbertForTokenClassification,
|
||||||
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -109,6 +110,7 @@ class TFAlbertModelTester:
|
|||||||
config = AlbertConfig(
|
config = AlbertConfig(
|
||||||
vocab_size=self.vocab_size,
|
vocab_size=self.vocab_size,
|
||||||
hidden_size=self.hidden_size,
|
hidden_size=self.hidden_size,
|
||||||
|
embedding_size=self.embedding_size,
|
||||||
num_hidden_layers=self.num_hidden_layers,
|
num_hidden_layers=self.num_hidden_layers,
|
||||||
num_attention_heads=self.num_attention_heads,
|
num_attention_heads=self.num_attention_heads,
|
||||||
intermediate_size=self.intermediate_size,
|
intermediate_size=self.intermediate_size,
|
||||||
@@ -198,6 +200,19 @@ class TFAlbertModelTester:
|
|||||||
result = model(inputs)
|
result = model(inputs)
|
||||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||||
|
|
||||||
|
def create_and_check_albert_for_token_classification(
|
||||||
|
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||||
|
):
|
||||||
|
config.num_labels = self.num_labels
|
||||||
|
model = TFAlbertForTokenClassification(config=config)
|
||||||
|
inputs = {
|
||||||
|
"input_ids": input_ids,
|
||||||
|
"attention_mask": input_mask,
|
||||||
|
"token_type_ids": token_type_ids,
|
||||||
|
}
|
||||||
|
result = model(inputs)
|
||||||
|
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
(
|
(
|
||||||
@@ -223,6 +238,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
TFAlbertForMaskedLM,
|
TFAlbertForMaskedLM,
|
||||||
TFAlbertForSequenceClassification,
|
TFAlbertForSequenceClassification,
|
||||||
TFAlbertForQuestionAnswering,
|
TFAlbertForQuestionAnswering,
|
||||||
|
TFAlbertForTokenClassification,
|
||||||
|
TFAlbertForMultipleChoice,
|
||||||
)
|
)
|
||||||
if is_tf_available()
|
if is_tf_available()
|
||||||
else ()
|
else ()
|
||||||
|
|||||||
@@ -265,6 +265,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
(
|
(
|
||||||
TFBertModel,
|
TFBertModel,
|
||||||
TFBertForMaskedLM,
|
TFBertForMaskedLM,
|
||||||
|
TFBertLMHeadModel,
|
||||||
TFBertForNextSentencePrediction,
|
TFBertForNextSentencePrediction,
|
||||||
TFBertForPreTraining,
|
TFBertForPreTraining,
|
||||||
TFBertForQuestionAnswering,
|
TFBertForQuestionAnswering,
|
||||||
|
|||||||
@@ -202,6 +202,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
TFElectraForTokenClassification,
|
TFElectraForTokenClassification,
|
||||||
TFElectraForMultipleChoice,
|
TFElectraForMultipleChoice,
|
||||||
TFElectraForSequenceClassification,
|
TFElectraForSequenceClassification,
|
||||||
|
TFElectraForQuestionAnswering,
|
||||||
)
|
)
|
||||||
if is_tf_available()
|
if is_tf_available()
|
||||||
else ()
|
else ()
|
||||||
|
|||||||
@@ -18,12 +18,6 @@ IGNORE_NON_TESTED = [
|
|||||||
"DPRSpanPredictor", # Building part of bigger (tested) model.
|
"DPRSpanPredictor", # Building part of bigger (tested) model.
|
||||||
"ReformerForMaskedLM", # Needs to be setup as decoder.
|
"ReformerForMaskedLM", # Needs to be setup as decoder.
|
||||||
"T5Stack", # Building part of bigger (tested) model.
|
"T5Stack", # Building part of bigger (tested) model.
|
||||||
"TFAlbertForMultipleChoice", # TODO: fix
|
|
||||||
"TFAlbertForTokenClassification", # TODO: fix
|
|
||||||
"TFBertLMHeadModel", # TODO: fix
|
|
||||||
"TFElectraForMultipleChoice", # Fix is in #6284
|
|
||||||
"TFElectraForQuestionAnswering", # TODO: fix
|
|
||||||
"TFElectraForSequenceClassification", # Fix is in #6284
|
|
||||||
"TFElectraMainLayer", # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?)
|
"TFElectraMainLayer", # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?)
|
||||||
"TFRobertaForMultipleChoice", # TODO: fix
|
"TFRobertaForMultipleChoice", # TODO: fix
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user