Distributed eval: SequentialDistributedSampler + gather all results (#4243)

* Distributed eval: SequentialDistributedSampler + gather all results

* For consistency only write to disk from world_master

Close https://github.com/huggingface/transformers/issues/4272

* Working distributed eval

* Hook into scripts

* Fix #3721 again

* TPU.mesh_reduce: stay in tensor space

Thanks @jysohn23

* Just a small comment

* whitespace

* torch.hub: pip install packaging

* Add test scenarii
This commit is contained in:
Julien Chaumond
2020-05-18 22:02:39 -04:00
committed by GitHub
parent 4c06893610
commit 5e7fe8b585
7 changed files with 280 additions and 83 deletions

View File

@@ -166,7 +166,7 @@ def main():
# Evaluation
results = {}
if training_args.do_eval and training_args.local_rank in [-1, 0]:
if training_args.do_eval:
logger.info("*** Evaluate ***")
# Loop to handle MNLI double evaluation (matched, mis-matched)
@@ -181,11 +181,12 @@ def main():
output_eval_file = os.path.join(
training_args.output_dir, f"eval_results_{eval_dataset.args.task_name}.txt"
)
with open(output_eval_file, "w") as writer:
logger.info("***** Eval results {} *****".format(eval_dataset.args.task_name))
for key, value in result.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
if trainer.is_world_master():
with open(output_eval_file, "w") as writer:
logger.info("***** Eval results {} *****".format(eval_dataset.args.task_name))
for key, value in result.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
results.update(result)