Rename compute_loss in TF models (#15207)

* Rename compute_loss to hf_compute_loss to avoid conflicts with the new Keras method

* make style

* Adding deprecation warning to `compute_loss`

* Fix sneaky reference to compute_loss

* Replace logger.warning with warnings.warn

* Clarifying warning and deprecation timeline
This commit is contained in:
Matt
2022-01-19 13:29:07 +00:00
committed by GitHub
parent d1f5ca1afd
commit 2708bfa127
35 changed files with 166 additions and 126 deletions

View File

@@ -1122,7 +1122,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(TF{{cookiecutter.camelca
sequence_output = outputs[0]
prediction_scores = self.mlm(sequence_output=sequence_output, training=inputs["training"])
loss = (
None if inputs["labels"] is None else self.compute_loss(labels=inputs["labels"], logits=prediction_scores)
None if inputs["labels"] is None else self.hf_compute_loss(labels=inputs["labels"], logits=prediction_scores)
)
if not inputs["return_dict"]:
@@ -1264,7 +1264,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForCausalLM(TF{{cookiecutter.camelca
# shift labels to the left and cut last logit token
logits = logits[:, :-1]
labels = inputs["labels"][:, 1:]
loss = self.compute_loss(labels=labels, logits=logits)
loss = self.hf_compute_loss(labels=labels, logits=logits)
if not inputs["return_dict"]:
output = (logits,) + outputs[2:]
@@ -1394,7 +1394,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification(TF{{cookie
training=inputs["training"],
)
logits = self.classifier(hidden_states=outputs[0], training=inputs["training"])
loss = None if inputs["labels"] is None else self.compute_loss(labels=inputs["labels"], logits=logits)
loss = None if inputs["labels"] is None else self.hf_compute_loss(labels=inputs["labels"], logits=logits)
if not inputs["return_dict"]:
output = (logits,) + outputs[1:]
@@ -1534,7 +1534,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(TF{{cookiecutter.c
logits = self.sequence_summary(inputs=outputs[0], training=inputs["training"])
logits = self.classifier(inputs=logits)
reshaped_logits = tf.reshape(tensor=logits, shape=(-1, num_choices))
loss = None if inputs["labels"] is None else self.compute_loss(labels=inputs["labels"], logits=reshaped_logits)
loss = None if inputs["labels"] is None else self.hf_compute_loss(labels=inputs["labels"], logits=reshaped_logits)
if not inputs["return_dict"]:
output = (reshaped_logits,) + outputs[1:]
@@ -1642,7 +1642,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForTokenClassification(TF{{cookiecut
sequence_output = outputs[0]
sequence_output = self.dropout(inputs=sequence_output, training=inputs["training"])
logits = self.classifier(inputs=sequence_output)
loss = None if inputs["labels"] is None else self.compute_loss(labels=inputs["labels"], logits=logits)
loss = None if inputs["labels"] is None else self.hf_compute_loss(labels=inputs["labels"], logits=logits)
if not inputs["return_dict"]:
output = (logits,) + outputs[1:]
@@ -1752,7 +1752,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(TF{{cookiecutte
if inputs["start_positions"] is not None and inputs["end_positions"] is not None:
labels = {"start_position": inputs["start_positions"]}
labels["end_position"] = inputs["end_positions"]
loss = self.compute_loss(labels=labels, logits=(start_logits, end_logits))
loss = self.hf_compute_loss(labels=labels, logits=(start_logits, end_logits))
if not inputs["return_dict"]:
output = (start_logits, end_logits) + outputs[2:]
@@ -3152,7 +3152,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
)
lm_logits = self.model.shared(outputs[0], mode="linear")
lm_logits = lm_logits + self.final_logits_bias
masked_lm_loss = None if inputs["labels"] is None else self.compute_loss(inputs["labels"], lm_logits)
masked_lm_loss = None if inputs["labels"] is None else self.hf_compute_loss(inputs["labels"], lm_logits)
if not inputs["return_dict"]:
output = (lm_logits,) + outputs[1:]
@@ -3251,7 +3251,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
)
return (past[0], reordered_past)
def compute_loss(self, labels, logits):
def hf_compute_loss(self, labels, logits):
"""CrossEntropyLoss that ignores pad tokens"""
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True,