Rename compute_loss in TF models (#15207)
* Rename compute_loss to hf_compute_loss to avoid conflicts with the new Keras method * make style * Adding deprecation warning to `compute_loss` * Fix sneaky reference to compute_loss * Replace logger.warning with warnings.warn * Clarifying warning and deprecation timeline
This commit is contained in:
@@ -1122,7 +1122,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(TF{{cookiecutter.camelca
|
||||
sequence_output = outputs[0]
|
||||
prediction_scores = self.mlm(sequence_output=sequence_output, training=inputs["training"])
|
||||
loss = (
|
||||
None if inputs["labels"] is None else self.compute_loss(labels=inputs["labels"], logits=prediction_scores)
|
||||
None if inputs["labels"] is None else self.hf_compute_loss(labels=inputs["labels"], logits=prediction_scores)
|
||||
)
|
||||
|
||||
if not inputs["return_dict"]:
|
||||
@@ -1264,7 +1264,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForCausalLM(TF{{cookiecutter.camelca
|
||||
# shift labels to the left and cut last logit token
|
||||
logits = logits[:, :-1]
|
||||
labels = inputs["labels"][:, 1:]
|
||||
loss = self.compute_loss(labels=labels, logits=logits)
|
||||
loss = self.hf_compute_loss(labels=labels, logits=logits)
|
||||
|
||||
if not inputs["return_dict"]:
|
||||
output = (logits,) + outputs[2:]
|
||||
@@ -1394,7 +1394,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification(TF{{cookie
|
||||
training=inputs["training"],
|
||||
)
|
||||
logits = self.classifier(hidden_states=outputs[0], training=inputs["training"])
|
||||
loss = None if inputs["labels"] is None else self.compute_loss(labels=inputs["labels"], logits=logits)
|
||||
loss = None if inputs["labels"] is None else self.hf_compute_loss(labels=inputs["labels"], logits=logits)
|
||||
|
||||
if not inputs["return_dict"]:
|
||||
output = (logits,) + outputs[1:]
|
||||
@@ -1534,7 +1534,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(TF{{cookiecutter.c
|
||||
logits = self.sequence_summary(inputs=outputs[0], training=inputs["training"])
|
||||
logits = self.classifier(inputs=logits)
|
||||
reshaped_logits = tf.reshape(tensor=logits, shape=(-1, num_choices))
|
||||
loss = None if inputs["labels"] is None else self.compute_loss(labels=inputs["labels"], logits=reshaped_logits)
|
||||
loss = None if inputs["labels"] is None else self.hf_compute_loss(labels=inputs["labels"], logits=reshaped_logits)
|
||||
|
||||
if not inputs["return_dict"]:
|
||||
output = (reshaped_logits,) + outputs[1:]
|
||||
@@ -1642,7 +1642,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForTokenClassification(TF{{cookiecut
|
||||
sequence_output = outputs[0]
|
||||
sequence_output = self.dropout(inputs=sequence_output, training=inputs["training"])
|
||||
logits = self.classifier(inputs=sequence_output)
|
||||
loss = None if inputs["labels"] is None else self.compute_loss(labels=inputs["labels"], logits=logits)
|
||||
loss = None if inputs["labels"] is None else self.hf_compute_loss(labels=inputs["labels"], logits=logits)
|
||||
|
||||
if not inputs["return_dict"]:
|
||||
output = (logits,) + outputs[1:]
|
||||
@@ -1752,7 +1752,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(TF{{cookiecutte
|
||||
if inputs["start_positions"] is not None and inputs["end_positions"] is not None:
|
||||
labels = {"start_position": inputs["start_positions"]}
|
||||
labels["end_position"] = inputs["end_positions"]
|
||||
loss = self.compute_loss(labels=labels, logits=(start_logits, end_logits))
|
||||
loss = self.hf_compute_loss(labels=labels, logits=(start_logits, end_logits))
|
||||
|
||||
if not inputs["return_dict"]:
|
||||
output = (start_logits, end_logits) + outputs[2:]
|
||||
@@ -3152,7 +3152,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
|
||||
)
|
||||
lm_logits = self.model.shared(outputs[0], mode="linear")
|
||||
lm_logits = lm_logits + self.final_logits_bias
|
||||
masked_lm_loss = None if inputs["labels"] is None else self.compute_loss(inputs["labels"], lm_logits)
|
||||
masked_lm_loss = None if inputs["labels"] is None else self.hf_compute_loss(inputs["labels"], lm_logits)
|
||||
|
||||
if not inputs["return_dict"]:
|
||||
output = (lm_logits,) + outputs[1:]
|
||||
@@ -3251,7 +3251,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
|
||||
)
|
||||
return (past[0], reordered_past)
|
||||
|
||||
def compute_loss(self, labels, logits):
|
||||
def hf_compute_loss(self, labels, logits):
|
||||
"""CrossEntropyLoss that ignores pad tokens"""
|
||||
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
|
||||
from_logits=True,
|
||||
|
||||
Reference in New Issue
Block a user