Better booleans handling in the TF models (#8777)

* Apply on BERT and ALBERT

* Update TF Bart

* Add input processing to TF BART

* Add input processing for TF CTRL

* Add input processing to TF Distilbert

* Add input processing to TF DPR

* Add input processing to TF Electra

* Add deprecated arguments

* Add input processing to TF XLM

* Add input processing to TF Funnel

* Add input processing to TF GPT2

* Add input processing to TF Longformer

* Add input processing to TF Lxmert

* Apply style

* Add input processing to TF Mobilebert

* Add input processing to TF GPT

* Add input processing to TF Roberta

* Add input processing to TF T5

* Add input processing to TF TransfoXL

* Apply style

* Rebase on master

* Bug fix

* Retry to bugfix

* Retry bug fix

* Fix wrong model name

* Try another fix

* Fix BART

* Fix input precessing

* Apply style

* Put the deprecated warnings in the input processing function

* Remove the unused imports

* Raise an error when len(kwargs)>0

* test ModelOutput instead of TFBaseModelOutput

* Bug fix

* Address Patrick's comments

* Address Patrick's comments

* Address Sylvain's comments

* Add boolean processing for the inputs

* Apply style

* Missing optional

* Fix missing some input proc

* Update the template

* Fix missing inputs

* Missing input

* Fix args parameter

* Trigger CI

* Trigger CI

* Trigger CI

* Address Patrick's and Sylvain's comments

* Replace warn by warning

* Trigger CI

* Fix XLNET

* Fix detection
This commit is contained in:
Julien Plu
2020-12-04 15:08:29 +01:00
committed by GitHub
parent 4c3d98dddc
commit dcd3046f98
21 changed files with 597 additions and 643 deletions

View File

@@ -550,6 +550,7 @@ class TFBertMainLayer(tf.keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
self.config = config
self.num_hidden_layers = config.num_hidden_layers
self.initializer_range = config.initializer_range
self.output_attentions = config.output_attentions
@@ -589,6 +590,7 @@ class TFBertMainLayer(tf.keras.layers.Layer):
):
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -601,13 +603,6 @@ class TFBertMainLayer(tf.keras.layers.Layer):
training=training,
kwargs_call=kwargs,
)
output_attentions = (
inputs["output_attentions"] if inputs["output_attentions"] is not None else self.output_attentions
)
output_hidden_states = (
inputs["output_hidden_states"] if inputs["output_hidden_states"] is not None else self.output_hidden_states
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.return_dict
if inputs["input_ids"] is not None and inputs["inputs_embeds"] is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
@@ -661,16 +656,16 @@ class TFBertMainLayer(tf.keras.layers.Layer):
embedding_output,
extended_attention_mask,
inputs["head_mask"],
output_attentions,
output_hidden_states,
return_dict,
inputs["output_attentions"],
inputs["output_hidden_states"],
inputs["return_dict"],
training=inputs["training"],
)
sequence_output = encoder_outputs[0]
pooled_output = self.pooler(sequence_output)
if not return_dict:
if not inputs["return_dict"]:
return (
sequence_output,
pooled_output,
@@ -848,6 +843,7 @@ class TFBertModel(TFBertPreTrainedModel):
):
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -929,6 +925,7 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -943,7 +940,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
training=training,
kwargs_call=kwargs,
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.bert.return_dict
outputs = self.bert(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
@@ -953,7 +949,7 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=return_dict,
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output, pooled_output = outputs[:2]
@@ -966,7 +962,7 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
d_labels["next_sentence_label"] = inputs["next_sentence_label"]
total_loss = self.compute_loss(labels=d_labels, logits=(prediction_scores, seq_relationship_score))
if not return_dict:
if not inputs["return_dict"]:
return (prediction_scores, seq_relationship_score) + outputs[2:]
return TFBertForPreTrainingOutput(
@@ -1029,6 +1025,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -1042,7 +1039,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
training=training,
kwargs_call=kwargs,
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.bert.return_dict
outputs = self.bert(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
@@ -1052,14 +1048,14 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=return_dict,
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output = outputs[0]
prediction_scores = self.mlm(sequence_output, training=inputs["training"])
loss = None if inputs["labels"] is None else self.compute_loss(inputs["labels"], prediction_scores)
if not return_dict:
if not inputs["return_dict"]:
output = (prediction_scores,) + outputs[2:]
return ((loss,) + output) if loss is not None else output
@@ -1116,6 +1112,7 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -1129,7 +1126,6 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
training=training,
kwargs_call=kwargs,
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.bert.return_dict
outputs = self.bert(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
@@ -1139,7 +1135,7 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=return_dict,
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output = outputs[0]
@@ -1152,7 +1148,7 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
labels = inputs["labels"][:, 1:]
loss = self.compute_loss(labels, logits)
if not return_dict:
if not inputs["return_dict"]:
output = (logits,) + outputs[2:]
return ((loss,) + output) if loss is not None else output
@@ -1212,6 +1208,7 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -1225,7 +1222,6 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
training=training,
kwargs_call=kwargs,
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.bert.return_dict
outputs = self.bert(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
@@ -1235,7 +1231,7 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=return_dict,
return_dict=inputs["return_dict"],
training=inputs["training"],
)
pooled_output = outputs[1]
@@ -1246,7 +1242,7 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
else self.compute_loss(labels=inputs["next_sentence_label"], logits=seq_relationship_scores)
)
if not return_dict:
if not inputs["return_dict"]:
output = (seq_relationship_scores,) + outputs[2:]
return ((next_sentence_loss,) + output) if next_sentence_loss is not None else output
@@ -1306,6 +1302,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -1319,7 +1316,6 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
training=training,
kwargs_call=kwargs,
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.bert.return_dict
outputs = self.bert(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
@@ -1329,7 +1325,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=return_dict,
return_dict=inputs["return_dict"],
training=inputs["training"],
)
pooled_output = outputs[1]
@@ -1337,7 +1333,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
logits = self.classifier(pooled_output)
loss = None if inputs["labels"] is None else self.compute_loss(inputs["labels"], logits)
if not return_dict:
if not inputs["return_dict"]:
output = (logits,) + outputs[2:]
return ((loss,) + output) if loss is not None else output
@@ -1406,6 +1402,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -1419,7 +1416,6 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
training=training,
kwargs_call=kwargs,
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.bert.return_dict
if inputs["input_ids"] is not None:
num_choices = shape_list(inputs["input_ids"])[1]
@@ -1452,7 +1448,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
flat_inputs_embeds,
inputs["output_attentions"],
inputs["output_hidden_states"],
return_dict=return_dict,
return_dict=inputs["return_dict"],
training=inputs["training"],
)
pooled_output = outputs[1]
@@ -1461,7 +1457,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
reshaped_logits = tf.reshape(logits, (-1, num_choices))
loss = None if inputs["labels"] is None else self.compute_loss(inputs["labels"], reshaped_logits)
if not return_dict:
if not inputs["return_dict"]:
output = (reshaped_logits,) + outputs[2:]
return ((loss,) + output) if loss is not None else output
@@ -1524,6 +1520,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -1537,7 +1534,6 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
training=training,
kwargs_call=kwargs,
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.bert.return_dict
outputs = self.bert(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
@@ -1547,7 +1543,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=return_dict,
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output = outputs[0]
@@ -1555,7 +1551,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
logits = self.classifier(sequence_output)
loss = None if inputs["labels"] is None else self.compute_loss(inputs["labels"], logits)
if not return_dict:
if not inputs["return_dict"]:
output = (logits,) + outputs[2:]
return ((loss,) + output) if loss is not None else output
@@ -1623,6 +1619,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
@@ -1637,7 +1634,6 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
training=training,
kwargs_call=kwargs,
)
return_dict = inputs["return_dict"] if inputs["return_dict"] is not None else self.bert.return_dict
outputs = self.bert(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
@@ -1647,7 +1643,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=return_dict,
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output = outputs[0]
@@ -1662,7 +1658,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
labels["end_position"] = inputs["end_positions"]
loss = self.compute_loss(labels, (start_logits, end_logits))
if not return_dict:
if not inputs["return_dict"]:
output = (start_logits, end_logits) + outputs[2:]
return ((loss,) + output) if loss is not None else output