[Examples] Fixes inconsistency around eval vs val and predict vs test (#11380)

* added changes for uniformity

* modified files

* corrected typo

* fixed qa scripts

* fix typos

* fixed predict typo in qa no trainer

* fixed test file

* reverted trainer changes

* reverted trainer changes in custom exmaples

* updated readme

* added changes in deepspeed test

* added changes for predict and eval
This commit is contained in:
Bhadresh Savani
2021-04-26 17:24:31 +01:00
committed by GitHub
parent 7959d83599
commit 1d30ec95c7
19 changed files with 288 additions and 276 deletions

View File

@@ -128,17 +128,17 @@ class DataTrainingArguments:
"value if set."
},
)
max_val_samples: Optional[int] = field(
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
"value if set."
},
)
max_test_samples: Optional[int] = field(
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of test examples to this "
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
},
)
@@ -363,8 +363,8 @@ def main():
if "validation" not in datasets:
raise ValueError("--do_eval requires a validation dataset")
eval_dataset = datasets["validation"]
if data_args.max_val_samples is not None:
eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
if data_args.max_eval_samples is not None:
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
eval_dataset = eval_dataset.map(
tokenize_and_align_labels,
batched=True,
@@ -375,10 +375,10 @@ def main():
if training_args.do_predict:
if "test" not in datasets:
raise ValueError("--do_predict requires a test dataset")
test_dataset = datasets["test"]
if data_args.max_test_samples is not None:
test_dataset = test_dataset.select(range(data_args.max_test_samples))
test_dataset = test_dataset.map(
predict_dataset = datasets["test"]
if data_args.max_predict_samples is not None:
predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))
predict_dataset = predict_dataset.map(
tokenize_and_align_labels,
batched=True,
num_proc=data_args.preprocessing_num_workers,
@@ -462,8 +462,8 @@ def main():
metrics = trainer.evaluate()
max_val_samples = data_args.max_val_samples if data_args.max_val_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_val_samples, len(eval_dataset))
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
@@ -472,7 +472,7 @@ def main():
if training_args.do_predict:
logger.info("*** Predict ***")
predictions, labels, metrics = trainer.predict(test_dataset)
predictions, labels, metrics = trainer.predict(predict_dataset, metric_key_prefix="predict")
predictions = np.argmax(predictions, axis=2)
# Remove ignored index (special tokens)
@@ -481,13 +481,13 @@ def main():
for prediction, label in zip(predictions, labels)
]
trainer.log_metrics("test", metrics)
trainer.save_metrics("test", metrics)
trainer.log_metrics("predict", metrics)
trainer.save_metrics("predict", metrics)
# Save predictions
output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt")
output_predictions_file = os.path.join(training_args.output_dir, "predictions.txt")
if trainer.is_world_process_zero():
with open(output_test_predictions_file, "w") as writer:
with open(output_predictions_file, "w") as writer:
for prediction in true_predictions:
writer.write(" ".join(prediction) + "\n")