[examples/summarization] deal with max_length and num_beams (#21740)

* Override the decoding parameters of Seq2SeqTrainer

* Fix quality

* Fix max_length parameter

* Fix quality

* Remove redundant parameter max_length

* Separate the preprocess of train and validation to use different max_target_length
This commit is contained in:
bofeng huang
2023-02-27 08:18:14 +01:00
committed by GitHub
parent 9ddf4f4f03
commit 3c0ce60855
2 changed files with 27 additions and 25 deletions

View File

@@ -639,6 +639,16 @@ def main():
result["gen_len"] = np.mean(prediction_lens)
return result
# Override the decoding parameters of Seq2SeqTrainer
training_args.generation_max_length = (
training_args.generation_max_length
if training_args.generation_max_length is not None
else data_args.val_max_target_length
)
training_args.generation_num_beams = (
data_args.num_beams if data_args.num_beams is not None else training_args.generation_num_beams
)
# Initialize our Trainer
trainer = Seq2SeqTrainer(
model=model,
@@ -672,15 +682,9 @@ def main():
# Evaluation
results = {}
max_length = (
training_args.generation_max_length
if training_args.generation_max_length is not None
else data_args.val_max_target_length
)
num_beams = data_args.num_beams if data_args.num_beams is not None else training_args.generation_num_beams
if training_args.do_eval:
logger.info("*** Evaluate ***")
metrics = trainer.evaluate(max_length=max_length, num_beams=num_beams, metric_key_prefix="eval")
metrics = trainer.evaluate(metric_key_prefix="eval")
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
@@ -690,9 +694,7 @@ def main():
if training_args.do_predict:
logger.info("*** Predict ***")
predict_results = trainer.predict(
predict_dataset, metric_key_prefix="predict", max_length=max_length, num_beams=num_beams
)
predict_results = trainer.predict(predict_dataset, metric_key_prefix="predict")
metrics = predict_results.metrics
max_predict_samples = (
data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)