From 505f2d749eb52f4b8b803d8c9a5f04442446e6c2 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 20 Aug 2020 19:23:47 +0200 Subject: [PATCH] [Tests] fix attention masks in Tests (#6621) * fix distilbert * fix typo --- tests/test_modeling_albert.py | 4 ++-- tests/test_modeling_bert.py | 4 ++-- tests/test_modeling_common.py | 10 +++++++--- tests/test_modeling_ctrl.py | 4 ++-- tests/test_modeling_distilbert.py | 4 ++-- tests/test_modeling_dpr.py | 4 ++-- tests/test_modeling_electra.py | 4 ++-- tests/test_modeling_flaubert.py | 4 ++-- tests/test_modeling_gpt2.py | 4 ++-- tests/test_modeling_longformer.py | 4 ++-- tests/test_modeling_mobilebert.py | 4 ++-- tests/test_modeling_reformer.py | 4 ++-- tests/test_modeling_roberta.py | 4 ++-- tests/test_modeling_xlm.py | 4 ++-- tests/test_modeling_xlnet.py | 4 ++-- 15 files changed, 35 insertions(+), 31 deletions(-) diff --git a/tests/test_modeling_albert.py b/tests/test_modeling_albert.py index 7abda85600..15459703bd 100644 --- a/tests/test_modeling_albert.py +++ b/tests/test_modeling_albert.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -71,7 +71,7 @@ class AlbertModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_bert.py b/tests/test_modeling_bert.py index fe336df743..0e640f5f35 100755 --- a/tests/test_modeling_bert.py +++ b/tests/test_modeling_bert.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor +from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_torch_available(): @@ -93,7 +93,7 @@ class BertModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 1cab7b1970..9186ce548d 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -704,9 +704,6 @@ class ModelTesterMixin: recursive_check(tuple_iterable_value, dict_iterable_value) elif tuple_object is None: return - elif torch.isinf(tuple_object).any() and torch.isinf(dict_object).any(): - # TODO: (Lysandre) - maybe take a look if that's ok here - return else: self.assertTrue( torch.allclose(tuple_object, dict_object, atol=1e-5), @@ -937,6 +934,13 @@ def ids_tensor(shape, vocab_size, rng=None, name=None): return torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() +def random_attention_mask(shape, rng=None, name=None): + attn_mask = ids_tensor(shape, vocab_size=2, rng=None, name=None) + # make sure that at least one token is attended to for each batch + attn_mask[:, -1] = 1 + return attn_mask + + def floats_tensor(shape, scale=1.0, rng=None, name=None): """Creates a random float32 tensor""" if rng is None: diff --git a/tests/test_modeling_ctrl.py b/tests/test_modeling_ctrl.py index 9920cde031..0cf997a1ab 100644 --- a/tests/test_modeling_ctrl.py +++ b/tests/test_modeling_ctrl.py @@ -19,7 +19,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -60,7 +60,7 @@ class CTRLModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_distilbert.py b/tests/test_modeling_distilbert.py index 8e76e23dd2..1a1dd4d7e9 100644 --- a/tests/test_modeling_distilbert.py +++ b/tests/test_modeling_distilbert.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -89,7 +89,7 @@ if is_torch_available(): input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) sequence_labels = None token_labels = None diff --git a/tests/test_modeling_dpr.py b/tests/test_modeling_dpr.py index d6206f1717..666c7898bf 100644 --- a/tests/test_modeling_dpr.py +++ b/tests/test_modeling_dpr.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -88,7 +88,7 @@ class DPRModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_electra.py b/tests/test_modeling_electra.py index 88c0eafa57..ae13e682aa 100644 --- a/tests/test_modeling_electra.py +++ b/tests/test_modeling_electra.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -69,7 +69,7 @@ class ElectraModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_flaubert.py b/tests/test_modeling_flaubert.py index aaecafc435..b748945618 100644 --- a/tests/test_modeling_flaubert.py +++ b/tests/test_modeling_flaubert.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -72,7 +72,7 @@ class FlaubertModelTester(object): def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() + input_mask = random_attention_mask([self.batch_size, self.seq_length]) input_lengths = None if self.use_input_lengths: diff --git a/tests/test_modeling_gpt2.py b/tests/test_modeling_gpt2.py index dd4ca1d304..19193a31df 100644 --- a/tests/test_modeling_gpt2.py +++ b/tests/test_modeling_gpt2.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor +from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_torch_available(): @@ -92,7 +92,7 @@ class GPT2ModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_longformer.py b/tests/test_modeling_longformer.py index 0730dce654..30f38f7776 100644 --- a/tests/test_modeling_longformer.py +++ b/tests/test_modeling_longformer.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -82,7 +82,7 @@ class LongformerModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_mobilebert.py b/tests/test_modeling_mobilebert.py index cedc075b9f..a10fb376a1 100644 --- a/tests/test_modeling_mobilebert.py +++ b/tests/test_modeling_mobilebert.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor +from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_torch_available(): @@ -94,7 +94,7 @@ class MobileBertModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_reformer.py b/tests/test_modeling_reformer.py index b6e3df069f..ca4bd1b5f9 100644 --- a/tests/test_modeling_reformer.py +++ b/tests/test_modeling_reformer.py @@ -19,7 +19,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_multigpu, require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor +from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_torch_available(): @@ -133,7 +133,7 @@ class ReformerModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) choice_labels = None if self.use_labels: diff --git a/tests/test_modeling_roberta.py b/tests/test_modeling_roberta.py index ddf4695127..7c432a9b87 100644 --- a/tests/test_modeling_roberta.py +++ b/tests/test_modeling_roberta.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor +from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask if is_torch_available(): @@ -71,7 +71,7 @@ class RobertaModelTester: input_mask = None if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + input_mask = random_attention_mask([self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: diff --git a/tests/test_modeling_xlm.py b/tests/test_modeling_xlm.py index 8114cd6ad8..dc5d89cec8 100644 --- a/tests/test_modeling_xlm.py +++ b/tests/test_modeling_xlm.py @@ -20,7 +20,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -73,7 +73,7 @@ class XLMModelTester: def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() + input_mask = random_attention_mask([self.batch_size, self.seq_length]) input_lengths = None if self.use_input_lengths: diff --git a/tests/test_modeling_xlnet.py b/tests/test_modeling_xlnet.py index 0408b25f63..31738a5bfd 100644 --- a/tests/test_modeling_xlnet.py +++ b/tests/test_modeling_xlnet.py @@ -21,7 +21,7 @@ from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester -from .test_modeling_common import ModelTesterMixin, ids_tensor +from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask if is_torch_available(): @@ -100,7 +100,7 @@ class XLNetModelTester: input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float() + input_mask = random_attention_mask([self.batch_size, self.seq_length]) input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size) perm_mask = torch.zeros(