Distributed eval: SequentialDistributedSampler + gather all results (#4243)

* Distributed eval: SequentialDistributedSampler + gather all results * For consistency only write to disk from world_master Close https://github.com/huggingface/transformers/issues/4272 * Working distributed eval * Hook into scripts * Fix #3721 again * TPU.mesh_reduce: stay in tensor space Thanks @jysohn23 * Just a small comment * whitespace * torch.hub: pip install packaging * Add test scenarii
2020-05-18 22:02:39 -04:00
parent 4c06893610
commit 5e7fe8b585
7 changed files with 280 additions and 83 deletions
--- a/examples/multiple-choice/run_multiple_choice.py
+++ b/examples/multiple-choice/run_multiple_choice.py
@@ -202,19 +202,20 @@ def main():

    # Evaluation
    results = {}
-    if training_args.do_eval and training_args.local_rank in [-1, 0]:
+    if training_args.do_eval:
        logger.info("*** Evaluate ***")

        result = trainer.evaluate()

        output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
-        with open(output_eval_file, "w") as writer:
-            logger.info("***** Eval results *****")
-            for key, value in result.items():
-                logger.info("  %s = %s", key, value)
-                writer.write("%s = %s\n" % (key, value))
+        if trainer.is_world_master():
+            with open(output_eval_file, "w") as writer:
+                logger.info("***** Eval results *****")
+                for key, value in result.items():
+                    logger.info("  %s = %s", key, value)
+                    writer.write("%s = %s\n" % (key, value))

-            results.update(result)
+                results.update(result)

    return results