New serving (#9419)
* Add a serving method * Add albert * Add serving for BERT and BART * Add more models * Finish the serving addition * Temp fix * Restore DPR * Fix funnel attribute * Fix attributes GPT2 * Fix OpenAIGPT attribute * Fix T5 attributes * Fix Bart attributes * Fix TransfoXL attributes * Add versioning * better test * Update template * Fix Flaubert * Fix T5 * Apply style * Remove unused imports * Deactivate extra parameters * Remove too long test + saved_model default to False * Ignore the saved model test for some models * Fix some inputs * Fix mpnet serving * Trigger CI * Address all comments
This commit is contained in:
@@ -888,6 +888,17 @@ class TFBertModel(TFBertPreTrainedModel):
|
||||
|
||||
return outputs
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFBaseModelOutputWithPooling(
|
||||
last_hidden_state=output.last_hidden_state,
|
||||
pooler_output=output.pooler_output,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""
|
||||
@@ -999,6 +1010,17 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFBertForPreTrainingOutput(
|
||||
prediction_logits=output.prediction_logits,
|
||||
seq_relationship_logits=output.seq_relationship_logits,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
|
||||
@add_start_docstrings("""Bert Model with a `language modeling` head on top. """, BERT_START_DOCSTRING)
|
||||
class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
@@ -1102,6 +1124,16 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFMaskedLMOutput(
|
||||
logits=output.logits,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
|
||||
class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
# names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
|
||||
@@ -1205,6 +1237,16 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFCausalLMOutput(
|
||||
logits=output.logits,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""Bert Model with a `next sentence prediction (classification)` head on top. """,
|
||||
@@ -1302,6 +1344,16 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFNextSentencePredictorOutput(
|
||||
logits=output.logits,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""
|
||||
@@ -1397,6 +1449,16 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFSequenceClassifierOutput(
|
||||
logits=output.logits,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""
|
||||
@@ -1525,6 +1587,30 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
@tf.function(
|
||||
input_signature=[
|
||||
{
|
||||
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
|
||||
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
|
||||
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
|
||||
}
|
||||
]
|
||||
)
|
||||
def serving(self, inputs):
|
||||
output = self.call(inputs)
|
||||
|
||||
return self.serving_output(output)
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFMultipleChoiceModelOutput(
|
||||
logits=output.logits,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""
|
||||
@@ -1625,6 +1711,16 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFTokenClassifierOutput(
|
||||
logits=output.logits,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""
|
||||
@@ -1737,3 +1833,14 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
|
||||
hidden_states=outputs.hidden_states,
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
def serving_output(self, output):
|
||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
|
||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
|
||||
|
||||
return TFQuestionAnsweringModelOutput(
|
||||
start_logits=output.start_logits,
|
||||
end_logits=output.end_logits,
|
||||
hidden_states=hs,
|
||||
attentions=attns,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user