Fix nn.DataParallel compatibility in PyTorch 1.5 (#4300)

* Test case for #3936 * multigpu tests pass on pytorch 1.4.0 * Fixup * multigpu tests pass on pytorch 1.5.0 * Update src/transformers/modeling_utils.py * Update src/transformers/modeling_utils.py * rename multigpu to require_multigpu * mode doc
2020-05-18 20:34:50 -04:00
parent 9de4afa897
commit 4c06893610
12 changed files with 95 additions and 21 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -23,7 +23,7 @@ from typing import List

 from transformers import is_torch_available

-from .utils import require_torch, slow, torch_device
+from .utils import require_multigpu, require_torch, slow, torch_device


 if is_torch_available():
@@ -758,6 +758,31 @@ class ModelTesterMixin:
                        return True
        return False

+    @require_multigpu
+    def test_multigpu_data_parallel_forward(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        # some params shouldn't be scattered by nn.DataParallel
+        # so just remove them if they are present.
+        blacklist_non_batched_params = ["head_mask"]
+        for k in blacklist_non_batched_params:
+            inputs_dict.pop(k, None)
+
+        # move input tensors to cuda:O
+        for k, v in inputs_dict.items():
+            if torch.is_tensor(v):
+                inputs_dict[k] = v.to(0)
+
+        for model_class in self.all_model_classes:
+            model = model_class(config=config)
+            model.to(0)
+            model.eval()
+
+            # Wrap model in nn.DataParallel
+            model = torch.nn.DataParallel(model)
+            with torch.no_grad():
+                _ = model(**inputs_dict)
+

 global_rng = random.Random()

--- a/tests/test_modeling_ctrl.py
+++ b/tests/test_modeling_ctrl.py
@@ -41,7 +41,7 @@ class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
        def __init__(
            self,
            parent,
-            batch_size=13,
+            batch_size=14,
            seq_length=7,
            is_training=True,
            use_token_type_ids=True,
--- a/tests/test_modeling_gpt2.py
+++ b/tests/test_modeling_gpt2.py
@@ -46,7 +46,7 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
        def __init__(
            self,
            parent,
-            batch_size=13,
+            batch_size=14,
            seq_length=7,
            is_training=True,
            use_token_type_ids=True,
--- a/tests/test_modeling_reformer.py
+++ b/tests/test_modeling_reformer.py
@@ -19,7 +19,7 @@ from transformers import is_torch_available

 from .test_configuration_common import ConfigTester
 from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
-from .utils import require_torch, slow, torch_device
+from .utils import require_multigpu, require_torch, slow, torch_device


 if is_torch_available():
@@ -448,9 +448,14 @@ class ReformerTesterMixin:
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs)

+    @require_multigpu
+    def test_multigpu_data_parallel_forward(self):
+        # Opt-out of this test.
+        pass
+

@require_torch
-class ReformerLocalAttnModelTest(ModelTesterMixin, ReformerTesterMixin, unittest.TestCase):
+class ReformerLocalAttnModelTest(ReformerTesterMixin, ModelTesterMixin, unittest.TestCase):
    all_model_classes = (ReformerModel, ReformerModelWithLMHead) if is_torch_available() else ()
    all_generative_model_classes = (ReformerModelWithLMHead,) if is_torch_available() else ()
    test_pruning = False
@@ -504,7 +509,7 @@ class ReformerLocalAttnModelTest(ModelTesterMixin, ReformerTesterMixin, unittest


@require_torch
-class ReformerLSHAttnModelTest(ModelTesterMixin, unittest.TestCase, ReformerTesterMixin):
+class ReformerLSHAttnModelTest(ReformerTesterMixin, ModelTesterMixin, unittest.TestCase):
    all_model_classes = (ReformerModel, ReformerModelWithLMHead) if is_torch_available() else ()
    all_generative_model_classes = (ReformerModelWithLMHead,) if is_torch_available() else ()
    test_pruning = False
--- a/tests/test_modeling_transfo_xl.py
+++ b/tests/test_modeling_transfo_xl.py
@@ -21,7 +21,7 @@ from transformers import is_torch_available

 from .test_configuration_common import ConfigTester
 from .test_modeling_common import ModelTesterMixin, ids_tensor
-from .utils import require_torch, slow, torch_device
+from .utils import require_multigpu, require_torch, slow, torch_device


 if is_torch_available():
@@ -43,7 +43,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
        def __init__(
            self,
            parent,
-            batch_size=13,
+            batch_size=14,
            seq_length=7,
            mem_len=30,
            clamp_len=15,
@@ -207,6 +207,11 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
        output_result = self.model_tester.create_transfo_xl_lm_head(*config_and_inputs)
        self.model_tester.check_transfo_xl_lm_head_output(output_result)

+    @require_multigpu
+    def test_multigpu_data_parallel_forward(self):
+        # Opt-out of this test.
+        pass
+
    @slow
    def test_model_from_pretrained(self):
        for model_name in list(TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
--- a/tests/test_modeling_xlnet.py
+++ b/tests/test_modeling_xlnet.py
@@ -61,7 +61,7 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
        def __init__(
            self,
            parent,
-            batch_size=13,
+            batch_size=14,
            seq_length=7,
            mem_len=10,
            clamp_len=-1,
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -94,6 +94,25 @@ def require_tf(test_case):
    return test_case


+def require_multigpu(test_case):
+    """
+    Decorator marking a test that requires a multi-GPU setup (in PyTorch).
+
+    These tests are skipped on a machine without multiple GPUs.
+
+    To run *only* the multigpu tests, assuming all test names contain multigpu:
+    $ pytest -sv ./tests -k "multigpu"
+    """
+    if not _torch_available:
+        return unittest.skip("test requires PyTorch")(test_case)
+
+    import torch
+
+    if torch.cuda.device_count() < 2:
+        return unittest.skip("test requires multiple GPUs")(test_case)
+    return test_case
+
+
 if _torch_available:
    # Set the USE_CUDA environment variable to select a GPU.
    torch_device = "cuda" if parse_flag_from_env("USE_CUDA") else "cpu"