[s2s] test_distributed_eval (#8315)

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
2020-11-05 13:01:15 -08:00
parent 04e442d575
commit d787935a14
4 changed files with 56 additions and 8 deletions
--- a/docs/source/testing.rst
+++ b/docs/source/testing.rst
@@ -450,7 +450,8 @@ Inside tests:

 .. code-block:: bash

-   torch.cuda.device_count()
+   from transformers.testing_utils import get_gpu_count
+   n_gpu = get_gpu_count() # works with torch and tf



--- a/examples/seq2seq/test_finetune_trainer.py
+++ b/examples/seq2seq/test_finetune_trainer.py
@@ -2,9 +2,9 @@ import os
 import sys
 from unittest.mock import patch

-from transformers import BertTokenizer, EncoderDecoderModel, is_torch_available
+from transformers import BertTokenizer, EncoderDecoderModel
 from transformers.file_utils import is_datasets_available
-from transformers.testing_utils import TestCasePlus, execute_subprocess_async, slow
+from transformers.testing_utils import TestCasePlus, execute_subprocess_async, get_gpu_count, slow
 from transformers.trainer_callback import TrainerState
 from transformers.trainer_utils import set_seed

@@ -13,9 +13,6 @@ from .seq2seq_trainer import Seq2SeqTrainer
 from .test_seq2seq_examples import MBART_TINY


-if is_torch_available():
-    import torch
-
 set_seed(42)
 MARIAN_MODEL = "sshleifer/student_marian_en_ro_6_1"

@@ -196,7 +193,7 @@ class TestFinetuneTrainer(TestCasePlus):
        """.split()
        # --eval_beams  2

-        n_gpu = torch.cuda.device_count()
+        n_gpu = get_gpu_count()
        if n_gpu > 1:
            distributed_args = f"""
                -m torch.distributed.launch
--- a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py
+++ b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py
@@ -3,7 +3,14 @@
 import os
 import sys

-from transformers.testing_utils import TestCasePlus, execute_subprocess_async, require_torch_multigpu
+from transformers.testing_utils import (
+    TestCasePlus,
+    execute_subprocess_async,
+    get_gpu_count,
+    require_torch_gpu,
+    require_torch_multigpu,
+    slow,
+)

 from .test_seq2seq_examples import CHEAP_ARGS, make_test_data_dir
 from .utils import load_json
@@ -80,3 +87,30 @@ class TestSummarizationDistillerMultiGPU(TestCasePlus):
        self.assertEqual(len(metrics["test"]), 1)
        desired_n_evals = int(args_d["max_epochs"] * (1 / args_d["val_check_interval"]) / 2 + 1)
        self.assertEqual(len(metrics["val"]), desired_n_evals)
+
+    @slow
+    @require_torch_gpu
+    def test_distributed_eval(self):
+        output_dir = self.get_auto_remove_tmp_dir()
+        args = f"""
+            --model_name Helsinki-NLP/opus-mt-en-ro
+            --save_dir {output_dir}
+            --data_dir test_data/wmt_en_ro
+            --num_beams 2
+            --task translation
+        """.split()
+
+        # we want this test to run even if there is only one GPU, but if there are more we use them all
+        n_gpu = get_gpu_count()
+        distributed_args = f"""
+            -m torch.distributed.launch
+            --nproc_per_node={n_gpu}
+            {self.test_file_dir}/run_distributed_eval.py
+        """.split()
+        cmd = [sys.executable] + distributed_args + args
+        execute_subprocess_async(cmd, env=self.get_env())
+
+        metrics_save_path = os.path.join(output_dir, "test_bleu.json")
+        metrics = load_json(metrics_save_path)
+        # print(metrics)
+        self.assertGreaterEqual(metrics["bleu"], 25)
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -297,6 +297,22 @@ def require_ray(test_case):
        return test_case


+def get_gpu_count():
+    """
+    Return the number of available gpus (regardless of whether torch or tf is used)
+    """
+    if _torch_available:
+        import torch
+
+        return torch.cuda.device_count()
+    elif _tf_available:
+        import tensorflow as tf
+
+        return len(tf.config.list_physical_devices("GPU"))
+    else:
+        return 0
+
+
 def get_tests_dir(append_path=None):
    """
    Args: