[Perceiver] Skip multi-gpu tests for now (#14813)
* [Perceiver] Skip multi-gpu tests for now * Update tests/test_modeling_perceiver.py * up * up
This commit is contained in:
committed by
GitHub
parent
8a818c26cb
commit
952a77b05d
@@ -86,6 +86,10 @@ is implemented in the library. Note that the models available in the library onl
|
||||
with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
|
||||
audio classification, video classification, etc.
|
||||
|
||||
**Note**:
|
||||
|
||||
- Perceiver does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see [issue #36035](https://github.com/pytorch/pytorch/issues/36035)
|
||||
|
||||
## Perceiver specific outputs
|
||||
|
||||
[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
|
||||
@@ -208,4 +212,4 @@ audio classification, video classification, etc.
|
||||
## PerceiverForMultimodalAutoencoding
|
||||
|
||||
[[autodoc]] PerceiverForMultimodalAutoencoding
|
||||
- forward
|
||||
- forward
|
||||
|
||||
@@ -35,6 +35,11 @@ while being much more memory-efficient and much faster on long sequences.*
|
||||
This model was contributed by `patrickvonplaten <https://huggingface.co/patrickvonplaten>`__. The Authors' code can be
|
||||
found `here <https://github.com/google/trax/tree/master/trax/models/reformer>`__.
|
||||
|
||||
**Note**:
|
||||
|
||||
- Reformer does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see `issue #36035
|
||||
<https://github.com/pytorch/pytorch/issues/36035>`__
|
||||
|
||||
Axial Positional Encodings
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
||||
@@ -44,6 +44,11 @@ Tips:
|
||||
This model was contributed by `thomwolf <https://huggingface.co/thomwolf>`__. The original code can be found `here
|
||||
<https://github.com/kimiyoung/transformer-xl>`__.
|
||||
|
||||
**Note**:
|
||||
|
||||
- TransformerXL does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see `issue #36035
|
||||
<https://github.com/pytorch/pytorch/issues/36035>`__
|
||||
|
||||
|
||||
TransfoXLConfig
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@@ -2128,7 +2128,9 @@ class PerceiverBasicDecoder(PerceiverAbstractDecoder):
|
||||
# to get the indices for the unflattened array
|
||||
# unravel_index returns a tuple (x_idx, y_idx, ...)
|
||||
# stack to get the [n, d] tensor of coordinates
|
||||
indices = list(torch.from_numpy(x) for x in np.unravel_index(subsampled_points, self.output_index_dims))
|
||||
indices = list(
|
||||
torch.from_numpy(x) for x in np.unravel_index(subsampled_points.cpu(), self.output_index_dims)
|
||||
)
|
||||
pos = torch.stack(indices, dim=1)
|
||||
batch_size = inputs.shape[0]
|
||||
# Map these coordinates to [-1, 1]
|
||||
|
||||
@@ -758,29 +758,11 @@ class PerceiverModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
loss.backward()
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@unittest.skip(
|
||||
reason="Perceiver does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
|
||||
)
|
||||
def test_multi_gpu_data_parallel_forward(self):
|
||||
for model_class in self.all_model_classes:
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(model_class)
|
||||
|
||||
# some params shouldn't be scattered by nn.DataParallel
|
||||
# so just remove them if they are present.
|
||||
blacklist_non_batched_params = ["head_mask", "decoder_head_mask", "cross_attn_head_mask"]
|
||||
for k in blacklist_non_batched_params:
|
||||
inputs_dict.pop(k, None)
|
||||
|
||||
# move input tensors to cuda:O
|
||||
for k, v in inputs_dict.items():
|
||||
if torch.is_tensor(v):
|
||||
inputs_dict[k] = v.to(0)
|
||||
|
||||
model = model_class(config=config)
|
||||
model.to(0)
|
||||
model.eval()
|
||||
|
||||
# Wrap model in nn.DataParallel
|
||||
model = nn.DataParallel(model)
|
||||
with torch.no_grad():
|
||||
_ = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="Perceiver models don't have a typical head like is the case with BERT")
|
||||
def test_save_load_fast_init_from_base(self):
|
||||
|
||||
@@ -573,8 +573,10 @@ class ReformerTesterMixin:
|
||||
self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs)
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@unittest.skip(
|
||||
reason="Reformer does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
|
||||
)
|
||||
def test_multi_gpu_data_parallel_forward(self):
|
||||
# Opt-out of this test.
|
||||
pass
|
||||
|
||||
def test_for_sequence_classification(self):
|
||||
|
||||
@@ -232,8 +232,10 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
|
||||
return
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@unittest.skip(
|
||||
reason="Transfo-XL does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
|
||||
)
|
||||
def test_multi_gpu_data_parallel_forward(self):
|
||||
# Opt-out of this test.
|
||||
pass
|
||||
|
||||
@slow
|
||||
|
||||
Reference in New Issue
Block a user