Question Answering for TF trainer (#4320)
* Add QA trainer example for TF * Make data_dir optional * Fix parameter logic * Fix feature convert * Update the READMEs to add the question-answering task * Apply style * Change 'sequence-classification' to 'text-classification' and prefix with 'eval' all the metric names * Apply style * Apply style
This commit is contained in:
@@ -226,7 +226,11 @@ class TFTrainer:
|
||||
else:
|
||||
metrics = {}
|
||||
|
||||
metrics["loss"] = loss.numpy()
|
||||
metrics["eval_loss"] = loss.numpy()
|
||||
|
||||
for key in list(metrics.keys()):
|
||||
if not key.startswith("eval_"):
|
||||
metrics[f"eval_{key}"] = metrics.pop(key)
|
||||
|
||||
return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
|
||||
|
||||
@@ -333,7 +337,7 @@ class TFTrainer:
|
||||
gradients = [(tf.clip_by_value(grad, -self.args.max_grad_norm, self.args.max_grad_norm)) for grad in gradients]
|
||||
vars = self.model.trainable_variables
|
||||
|
||||
if self.args.mode == "token-classification":
|
||||
if self.args.mode in ["token-classification", "question-answering"]:
|
||||
vars = [var for var in self.model.trainable_variables if "pooler" not in var.name]
|
||||
|
||||
self.optimizer.apply_gradients(list(zip(gradients, vars)))
|
||||
@@ -373,7 +377,7 @@ class TFTrainer:
|
||||
per_example_loss, _ = self._run_model(features, labels, True)
|
||||
vars = self.model.trainable_variables
|
||||
|
||||
if self.args.mode == "token-classification":
|
||||
if self.args.mode in ["token-classification", "question-answering"]:
|
||||
vars = [var for var in self.model.trainable_variables if "pooler" not in var.name]
|
||||
|
||||
gradients = self.optimizer.get_gradients(per_example_loss, vars)
|
||||
@@ -390,7 +394,7 @@ class TFTrainer:
|
||||
labels: the batched labels.
|
||||
training: run the model in training mode or not
|
||||
"""
|
||||
if self.args.mode == "sequence-classification" or self.args.mode == "token-classification":
|
||||
if self.args.mode == "text-classification" or self.args.mode == "token-classification":
|
||||
logits = self.model(features, training=training)[0]
|
||||
else:
|
||||
logits = self.model(features, training=training)
|
||||
@@ -400,6 +404,10 @@ class TFTrainer:
|
||||
reduced_logits = tf.boolean_mask(tf.reshape(logits, (-1, shape_list(logits)[2])), active_loss)
|
||||
labels = tf.boolean_mask(tf.reshape(labels, (-1,)), active_loss)
|
||||
loss = self.loss(labels, reduced_logits)
|
||||
elif self.args.mode == "question-answering":
|
||||
start_loss = self.loss(labels["start_position"], logits[0])
|
||||
end_loss = self.loss(labels["end_position"], logits[1])
|
||||
loss = (start_loss + end_loss) / 2.0
|
||||
else:
|
||||
loss = self.loss(labels, logits)
|
||||
|
||||
|
||||
@@ -21,8 +21,8 @@ class TFTrainingArguments(TrainingArguments):
|
||||
},
|
||||
)
|
||||
mode: str = field(
|
||||
default="sequence-classification",
|
||||
metadata={"help": 'Type of task, one of "sequence-classification", "token-classification" '},
|
||||
default="text-classification",
|
||||
metadata={"help": 'Type of task, one of "text-classification", "token-classification", "question-answering"'},
|
||||
)
|
||||
loss_name: str = field(
|
||||
default="SparseCategoricalCrossentropy",
|
||||
|
||||
Reference in New Issue
Block a user