From 952a77b05da2484eb3f5d9d1cd67612dbb949e2d Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 20 Dec 2021 15:22:50 +0100 Subject: [PATCH] [Perceiver] Skip multi-gpu tests for now (#14813) * [Perceiver] Skip multi-gpu tests for now * Update tests/test_modeling_perceiver.py * up * up --- docs/source/model_doc/perceiver.mdx | 6 ++++- docs/source/model_doc/reformer.rst | 5 ++++ docs/source/model_doc/transformerxl.rst | 5 ++++ .../models/perceiver/modeling_perceiver.py | 4 ++- tests/test_modeling_perceiver.py | 26 +++---------------- tests/test_modeling_reformer.py | 4 ++- tests/test_modeling_transfo_xl.py | 4 ++- 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/docs/source/model_doc/perceiver.mdx b/docs/source/model_doc/perceiver.mdx index ff9bbaedb4..b474074e8b 100644 --- a/docs/source/model_doc/perceiver.mdx +++ b/docs/source/model_doc/perceiver.mdx @@ -86,6 +86,10 @@ is implemented in the library. Note that the models available in the library onl with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection, audio classification, video classification, etc. +**Note**: + +- Perceiver does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see [issue #36035](https://github.com/pytorch/pytorch/issues/36035) + ## Perceiver specific outputs [[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput @@ -208,4 +212,4 @@ audio classification, video classification, etc. ## PerceiverForMultimodalAutoencoding [[autodoc]] PerceiverForMultimodalAutoencoding - - forward \ No newline at end of file + - forward diff --git a/docs/source/model_doc/reformer.rst b/docs/source/model_doc/reformer.rst index ea48ce5368..6842884d76 100644 --- a/docs/source/model_doc/reformer.rst +++ b/docs/source/model_doc/reformer.rst @@ -35,6 +35,11 @@ while being much more memory-efficient and much faster on long sequences.* This model was contributed by `patrickvonplaten `__. The Authors' code can be found `here `__. +**Note**: + +- Reformer does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see `issue #36035 + `__ + Axial Positional Encodings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/model_doc/transformerxl.rst b/docs/source/model_doc/transformerxl.rst index df4ebecbf3..178268f522 100644 --- a/docs/source/model_doc/transformerxl.rst +++ b/docs/source/model_doc/transformerxl.rst @@ -44,6 +44,11 @@ Tips: This model was contributed by `thomwolf `__. The original code can be found `here `__. +**Note**: + +- TransformerXL does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see `issue #36035 + `__ + TransfoXLConfig ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index ede1063130..d3f91c34df 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -2128,7 +2128,9 @@ class PerceiverBasicDecoder(PerceiverAbstractDecoder): # to get the indices for the unflattened array # unravel_index returns a tuple (x_idx, y_idx, ...) # stack to get the [n, d] tensor of coordinates - indices = list(torch.from_numpy(x) for x in np.unravel_index(subsampled_points, self.output_index_dims)) + indices = list( + torch.from_numpy(x) for x in np.unravel_index(subsampled_points.cpu(), self.output_index_dims) + ) pos = torch.stack(indices, dim=1) batch_size = inputs.shape[0] # Map these coordinates to [-1, 1] diff --git a/tests/test_modeling_perceiver.py b/tests/test_modeling_perceiver.py index 4e6e271448..128be2d371 100644 --- a/tests/test_modeling_perceiver.py +++ b/tests/test_modeling_perceiver.py @@ -758,29 +758,11 @@ class PerceiverModelTest(ModelTesterMixin, unittest.TestCase): loss.backward() @require_torch_multi_gpu + @unittest.skip( + reason="Perceiver does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035" + ) def test_multi_gpu_data_parallel_forward(self): - for model_class in self.all_model_classes: - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(model_class) - - # some params shouldn't be scattered by nn.DataParallel - # so just remove them if they are present. - blacklist_non_batched_params = ["head_mask", "decoder_head_mask", "cross_attn_head_mask"] - for k in blacklist_non_batched_params: - inputs_dict.pop(k, None) - - # move input tensors to cuda:O - for k, v in inputs_dict.items(): - if torch.is_tensor(v): - inputs_dict[k] = v.to(0) - - model = model_class(config=config) - model.to(0) - model.eval() - - # Wrap model in nn.DataParallel - model = nn.DataParallel(model) - with torch.no_grad(): - _ = model(**self._prepare_for_class(inputs_dict, model_class)) + pass @unittest.skip(reason="Perceiver models don't have a typical head like is the case with BERT") def test_save_load_fast_init_from_base(self): diff --git a/tests/test_modeling_reformer.py b/tests/test_modeling_reformer.py index dff424bb99..4ccaa41245 100644 --- a/tests/test_modeling_reformer.py +++ b/tests/test_modeling_reformer.py @@ -573,8 +573,10 @@ class ReformerTesterMixin: self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs) @require_torch_multi_gpu + @unittest.skip( + reason="Reformer does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035" + ) def test_multi_gpu_data_parallel_forward(self): - # Opt-out of this test. pass def test_for_sequence_classification(self): diff --git a/tests/test_modeling_transfo_xl.py b/tests/test_modeling_transfo_xl.py index 4885e97329..c69f3b2490 100644 --- a/tests/test_modeling_transfo_xl.py +++ b/tests/test_modeling_transfo_xl.py @@ -232,8 +232,10 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC return @require_torch_multi_gpu + @unittest.skip( + reason="Transfo-XL does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035" + ) def test_multi_gpu_data_parallel_forward(self): - # Opt-out of this test. pass @slow