[Perceiver] Skip multi-gpu tests for now (#14813)
* [Perceiver] Skip multi-gpu tests for now * Update tests/test_modeling_perceiver.py * up * up
This commit is contained in:
committed by
GitHub
parent
8a818c26cb
commit
952a77b05d
@@ -86,6 +86,10 @@ is implemented in the library. Note that the models available in the library onl
|
|||||||
with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
|
with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
|
||||||
audio classification, video classification, etc.
|
audio classification, video classification, etc.
|
||||||
|
|
||||||
|
**Note**:
|
||||||
|
|
||||||
|
- Perceiver does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see [issue #36035](https://github.com/pytorch/pytorch/issues/36035)
|
||||||
|
|
||||||
## Perceiver specific outputs
|
## Perceiver specific outputs
|
||||||
|
|
||||||
[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
|
[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
|
||||||
|
|||||||
@@ -35,6 +35,11 @@ while being much more memory-efficient and much faster on long sequences.*
|
|||||||
This model was contributed by `patrickvonplaten <https://huggingface.co/patrickvonplaten>`__. The Authors' code can be
|
This model was contributed by `patrickvonplaten <https://huggingface.co/patrickvonplaten>`__. The Authors' code can be
|
||||||
found `here <https://github.com/google/trax/tree/master/trax/models/reformer>`__.
|
found `here <https://github.com/google/trax/tree/master/trax/models/reformer>`__.
|
||||||
|
|
||||||
|
**Note**:
|
||||||
|
|
||||||
|
- Reformer does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see `issue #36035
|
||||||
|
<https://github.com/pytorch/pytorch/issues/36035>`__
|
||||||
|
|
||||||
Axial Positional Encodings
|
Axial Positional Encodings
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
|||||||
@@ -44,6 +44,11 @@ Tips:
|
|||||||
This model was contributed by `thomwolf <https://huggingface.co/thomwolf>`__. The original code can be found `here
|
This model was contributed by `thomwolf <https://huggingface.co/thomwolf>`__. The original code can be found `here
|
||||||
<https://github.com/kimiyoung/transformer-xl>`__.
|
<https://github.com/kimiyoung/transformer-xl>`__.
|
||||||
|
|
||||||
|
**Note**:
|
||||||
|
|
||||||
|
- TransformerXL does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see `issue #36035
|
||||||
|
<https://github.com/pytorch/pytorch/issues/36035>`__
|
||||||
|
|
||||||
|
|
||||||
TransfoXLConfig
|
TransfoXLConfig
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|||||||
@@ -2128,7 +2128,9 @@ class PerceiverBasicDecoder(PerceiverAbstractDecoder):
|
|||||||
# to get the indices for the unflattened array
|
# to get the indices for the unflattened array
|
||||||
# unravel_index returns a tuple (x_idx, y_idx, ...)
|
# unravel_index returns a tuple (x_idx, y_idx, ...)
|
||||||
# stack to get the [n, d] tensor of coordinates
|
# stack to get the [n, d] tensor of coordinates
|
||||||
indices = list(torch.from_numpy(x) for x in np.unravel_index(subsampled_points, self.output_index_dims))
|
indices = list(
|
||||||
|
torch.from_numpy(x) for x in np.unravel_index(subsampled_points.cpu(), self.output_index_dims)
|
||||||
|
)
|
||||||
pos = torch.stack(indices, dim=1)
|
pos = torch.stack(indices, dim=1)
|
||||||
batch_size = inputs.shape[0]
|
batch_size = inputs.shape[0]
|
||||||
# Map these coordinates to [-1, 1]
|
# Map these coordinates to [-1, 1]
|
||||||
|
|||||||
@@ -758,29 +758,11 @@ class PerceiverModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
|
@unittest.skip(
|
||||||
|
reason="Perceiver does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
|
||||||
|
)
|
||||||
def test_multi_gpu_data_parallel_forward(self):
|
def test_multi_gpu_data_parallel_forward(self):
|
||||||
for model_class in self.all_model_classes:
|
pass
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(model_class)
|
|
||||||
|
|
||||||
# some params shouldn't be scattered by nn.DataParallel
|
|
||||||
# so just remove them if they are present.
|
|
||||||
blacklist_non_batched_params = ["head_mask", "decoder_head_mask", "cross_attn_head_mask"]
|
|
||||||
for k in blacklist_non_batched_params:
|
|
||||||
inputs_dict.pop(k, None)
|
|
||||||
|
|
||||||
# move input tensors to cuda:O
|
|
||||||
for k, v in inputs_dict.items():
|
|
||||||
if torch.is_tensor(v):
|
|
||||||
inputs_dict[k] = v.to(0)
|
|
||||||
|
|
||||||
model = model_class(config=config)
|
|
||||||
model.to(0)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
# Wrap model in nn.DataParallel
|
|
||||||
model = nn.DataParallel(model)
|
|
||||||
with torch.no_grad():
|
|
||||||
_ = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
@unittest.skip(reason="Perceiver models don't have a typical head like is the case with BERT")
|
@unittest.skip(reason="Perceiver models don't have a typical head like is the case with BERT")
|
||||||
def test_save_load_fast_init_from_base(self):
|
def test_save_load_fast_init_from_base(self):
|
||||||
|
|||||||
@@ -573,8 +573,10 @@ class ReformerTesterMixin:
|
|||||||
self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs)
|
self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs)
|
||||||
|
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
|
@unittest.skip(
|
||||||
|
reason="Reformer does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
|
||||||
|
)
|
||||||
def test_multi_gpu_data_parallel_forward(self):
|
def test_multi_gpu_data_parallel_forward(self):
|
||||||
# Opt-out of this test.
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_for_sequence_classification(self):
|
def test_for_sequence_classification(self):
|
||||||
|
|||||||
@@ -232,8 +232,10 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
|
|||||||
return
|
return
|
||||||
|
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
|
@unittest.skip(
|
||||||
|
reason="Transfo-XL does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
|
||||||
|
)
|
||||||
def test_multi_gpu_data_parallel_forward(self):
|
def test_multi_gpu_data_parallel_forward(self):
|
||||||
# Opt-out of this test.
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
|
|||||||
Reference in New Issue
Block a user