From 2de2c9eccaa579be7fb2c13690772159d5577fac Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Mon, 2 May 2022 16:28:58 +0200 Subject: [PATCH] Clean up vision tests (#17024) * Clean up tests * Make fixup Co-authored-by: Niels Rogge --- tests/beit/test_modeling_beit.py | 134 +++-------------------- tests/beit/test_modeling_flax_beit.py | 84 +-------------- tests/deit/test_modeling_deit.py | 105 ++---------------- tests/dpt/test_modeling_dpt.py | 101 +----------------- tests/vit/test_modeling_flax_vit.py | 79 +------------- tests/vit/test_modeling_tf_vit.py | 147 ++------------------------ tests/vit/test_modeling_vit.py | 97 +---------------- 7 files changed, 48 insertions(+), 699 deletions(-) diff --git a/tests/beit/test_modeling_beit.py b/tests/beit/test_modeling_beit.py index 59776f8355..ee75019435 100644 --- a/tests/beit/test_modeling_beit.py +++ b/tests/beit/test_modeling_beit.py @@ -96,9 +96,9 @@ class BeitModelTester: self.out_indices = out_indices self.num_labels = num_labels - # in BeiT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) + # in BeiT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token) num_patches = (image_size // patch_size) ** 2 - self.expected_seq_length = num_patches + 1 + self.seq_length = num_patches + 1 def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) @@ -136,16 +136,14 @@ class BeitModelTester: model.to(torch_device) model.eval() result = model(pixel_values) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size) - ) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) def create_and_check_for_masked_lm(self, config, pixel_values, labels, pixel_labels): model = BeitForMaskedImageModeling(config=config) model.to(torch_device) model.eval() result = model(pixel_values) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.expected_seq_length - 1, self.vocab_size)) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length - 1, self.vocab_size)) def create_and_check_for_image_classification(self, config, pixel_values, labels, pixel_labels): config.num_labels = self.type_sequence_label_size @@ -155,7 +153,7 @@ class BeitModelTester: result = model(pixel_values, labels=labels) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size)) - def create_and_check_for_image_segmentation(self, config, pixel_values, labels, pixel_labels): + def create_and_check_for_semantic_segmentation(self, config, pixel_values, labels, pixel_labels): config.num_labels = self.num_labels model = BeitForSemanticSegmentation(config) model.to(torch_device) @@ -200,8 +198,8 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() + @unittest.skip(reason="BEiT does not use inputs_embeds") def test_inputs_embeds(self): - # BEiT does not use inputs_embeds pass def test_model_common_attributes(self): @@ -229,9 +227,17 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) - def test_for_image_segmentation(self): + def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_image_segmentation(*config_and_inputs) + self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) + + def test_for_image_classification(self): + config_and_inputs = self.model_tester.prepare_config_and_inputs() + self.model_tester.create_and_check_for_image_classification(*config_and_inputs) + + def test_for_semantic_segmentation(self): + config_and_inputs = self.model_tester.prepare_config_and_inputs() + self.model_tester.create_and_check_for_semantic_segmentation(*config_and_inputs) def test_training(self): if not self.model_tester.is_training: @@ -267,13 +273,7 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase): or not model_class.supports_gradient_checkpointing ): continue - # TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING - # this can then be incorporated into _prepare_for_class in test_modeling_common.py - elif model_class.__name__ == "BeitForSemanticSegmentation": - batch_size, num_channels, height, width = inputs_dict["pixel_values"].shape - inputs_dict["labels"] = torch.zeros( - [self.model_tester.batch_size, height, width], device=torch_device - ).long() + model = model_class(config) model.gradient_checkpointing_enable() model.to(torch_device) @@ -300,106 +300,6 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase): msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - # BEiT has a different seq_length - seq_len = self.model_tester.expected_seq_length - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - self.assertEqual(out_len + 1, len(outputs)) - - self_attentions = outputs.attentions - - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - # BEiT has a different seq_length - seq_length = self.model_tester.expected_seq_length - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - - def test_for_masked_lm(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) - - def test_for_image_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_image_classification(*config_and_inputs) - @slow def test_model_from_pretrained(self): for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: diff --git a/tests/beit/test_modeling_flax_beit.py b/tests/beit/test_modeling_flax_beit.py index 8977ab6542..a8a53eda6e 100644 --- a/tests/beit/test_modeling_flax_beit.py +++ b/tests/beit/test_modeling_flax_beit.py @@ -75,9 +75,9 @@ class FlaxBeitModelTester(unittest.TestCase): self.type_sequence_label_size = type_sequence_label_size self.initializer_range = initializer_range - # in BeiT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) + # in BeiT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token) num_patches = (image_size // patch_size) ** 2 - self.expected_seq_length = num_patches + 1 + self.seq_length = num_patches + 1 def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) @@ -108,14 +108,12 @@ class FlaxBeitModelTester(unittest.TestCase): model = FlaxBeitModel(config=config) result = model(pixel_values) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size) - ) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) def create_and_check_for_masked_lm(self, config, pixel_values, labels): model = FlaxBeitForMaskedImageModeling(config=config) result = model(pixel_values) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.expected_seq_length - 1, self.vocab_size)) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length - 1, self.vocab_size)) def create_and_check_for_image_classification(self, config, pixel_values, labels): config.num_labels = self.type_sequence_label_size @@ -148,51 +146,7 @@ class FlaxBeitModelTest(FlaxModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() - # We need to override this test because in Beit, the seq_len equals the number of patches + 1 - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - seq_length = self.model_tester.expected_seq_length - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_length, seq_length], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - added_hidden_states = 1 - self.assertEqual(out_len + added_hidden_states, len(outputs)) - - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_length, seq_length], - ) - - # We neeed to override this test because Beit's forward signature is different than text models. + # We need to override this test because Beit's forward signature is different than text models. def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() @@ -229,34 +183,6 @@ class FlaxBeitModelTest(FlaxModelTesterMixin, unittest.TestCase): for jitted_output, output in zip(jitted_outputs, outputs): self.assertEqual(jitted_output.shape, output.shape) - # We need to override this test because in Beit, the seq_len equals the number of patches + 1 - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - seq_length = self.model_tester.expected_seq_length - - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - hidden_states = outputs.hidden_states - - self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1) - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) diff --git a/tests/deit/test_modeling_deit.py b/tests/deit/test_modeling_deit.py index f8723c1875..5876b051fe 100644 --- a/tests/deit/test_modeling_deit.py +++ b/tests/deit/test_modeling_deit.py @@ -92,9 +92,9 @@ class DeiTModelTester: self.scope = scope self.encoder_stride = encoder_stride - # in DeiT, the expected seq_len equals the number of patches + 2 (we add 2 for the [CLS] and distilation tokens) + # in DeiT, the seq length equals the number of patches + 2 (we add 2 for the [CLS] and distilation tokens) num_patches = (image_size // patch_size) ** 2 - self.expected_seq_length = num_patches + 2 + self.seq_length = num_patches + 2 def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) @@ -129,9 +129,7 @@ class DeiTModelTester: model.to(torch_device) model.eval() result = model(pixel_values) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size) - ) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) def create_and_check_for_image_classification(self, config, pixel_values, labels): config.num_labels = self.type_sequence_label_size @@ -181,8 +179,8 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() + @unittest.skip(reason="DeiT does not use inputs_embeds") def test_inputs_embeds(self): - # DeiT does not use inputs_embeds pass def test_model_common_attributes(self): @@ -210,94 +208,9 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - seq_len = self.model_tester.expected_seq_length - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - self.assertEqual(out_len + 1, len(outputs)) - - self_attentions = outputs.attentions - - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - seq_length = self.model_tester.expected_seq_length - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) + def test_for_image_classification(self): + config_and_inputs = self.model_tester.prepare_config_and_inputs() + self.model_tester.create_and_check_for_image_classification(*config_and_inputs) # special case for DeiTForImageClassificationWithTeacher model def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): @@ -403,10 +316,6 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase): loss.backward() - def test_for_image_classification(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_for_image_classification(*config_and_inputs) - @slow def test_model_from_pretrained(self): for model_name in DEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: diff --git a/tests/dpt/test_modeling_dpt.py b/tests/dpt/test_modeling_dpt.py index 08bb550e0e..f908bd8dc1 100644 --- a/tests/dpt/test_modeling_dpt.py +++ b/tests/dpt/test_modeling_dpt.py @@ -81,9 +81,9 @@ class DPTModelTester: self.initializer_range = initializer_range self.num_labels = num_labels self.scope = scope - # expected sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token) + # sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token) num_patches = (image_size // patch_size) ** 2 - self.expected_seq_length = num_patches + 1 + self.seq_length = num_patches + 1 def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) @@ -118,9 +118,7 @@ class DPTModelTester: model.to(torch_device) model.eval() result = model(pixel_values) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size) - ) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) def create_and_check_for_depth_estimation(self, config, pixel_values, labels): config.num_labels = self.num_labels @@ -167,8 +165,8 @@ class DPTModelTest(ModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() + @unittest.skip(reason="DPT does not use inputs_embeds") def test_inputs_embeds(self): - # DPT does not use inputs_embeds pass def test_model_common_attributes(self): @@ -204,97 +202,6 @@ class DPTModelTest(ModelTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_semantic_segmentation(*config_and_inputs) - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - # in DPT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) - seq_len = self.model_tester.expected_seq_length - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - self.assertEqual(len(outputs.attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - self.assertEqual(out_len + 1, len(outputs)) - - self_attentions = outputs.attentions - - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - # DPT has a different seq_length - seq_len = self.model_tester.expected_seq_length - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [seq_len, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - def test_training(self): for model_class in self.all_model_classes: if model_class.__name__ == "DPTForDepthEstimation": diff --git a/tests/vit/test_modeling_flax_vit.py b/tests/vit/test_modeling_flax_vit.py index 0af2123c90..9fc734005f 100644 --- a/tests/vit/test_modeling_flax_vit.py +++ b/tests/vit/test_modeling_flax_vit.py @@ -67,9 +67,9 @@ class FlaxViTModelTester(unittest.TestCase): self.type_sequence_label_size = type_sequence_label_size self.initializer_range = initializer_range - # in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) + # in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token) num_patches = (image_size // patch_size) ** 2 - self.expected_seq_length = num_patches + 1 + self.seq_length = num_patches + 1 def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) @@ -123,50 +123,6 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() - # We need to override this test because in ViT, the seq_len equals the number of patches + 1 - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - seq_length = self.model_tester.expected_seq_length - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_length, seq_length], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - added_hidden_states = 1 - self.assertEqual(out_len + added_hidden_states, len(outputs)) - - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_length, seq_length], - ) - # We neeed to override this test because ViT's forward signature is different than text models. def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() @@ -180,7 +136,7 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase): expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) - # We neeed to override this test because ViT expects pixel_values instead of input_ids + # We need to override this test because ViT expects pixel_values instead of input_ids def test_jit_compilation(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -204,35 +160,6 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase): for jitted_output, output in zip(jitted_outputs, outputs): self.assertEqual(jitted_output.shape, output.shape) - # We need to override this test because in ViT, the seq_len equals the number of patches + 1 - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - - seq_length = self.model_tester.expected_seq_length - - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - hidden_states = outputs.hidden_states - - self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1) - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - @slow def test_model_from_pretrained(self): for model_class_name in self.all_model_classes: diff --git a/tests/vit/test_modeling_tf_vit.py b/tests/vit/test_modeling_tf_vit.py index 9ad64e8237..e8bbd55cbb 100644 --- a/tests/vit/test_modeling_tf_vit.py +++ b/tests/vit/test_modeling_tf_vit.py @@ -16,12 +16,10 @@ import inspect -import os -import tempfile import unittest from transformers import ViTConfig -from transformers.testing_utils import require_tf, require_vision, slow, tooslow +from transformers.testing_utils import require_tf, require_vision, slow from transformers.utils import cached_property, is_tf_available, is_vision_available from ..test_configuration_common import ConfigTester @@ -80,9 +78,9 @@ class TFViTModelTester: self.initializer_range = initializer_range self.scope = scope - # in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) + # in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token) num_patches = (image_size // patch_size) ** 2 - self.expected_seq_length = num_patches + 1 + self.seq_length = num_patches + 1 def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) @@ -114,18 +112,14 @@ class TFViTModelTester: def create_and_check_model(self, config, pixel_values, labels): model = TFViTModel(config=config) result = model(pixel_values, training=False) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size) - ) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) # Test with an image with different size than the one specified in config. image_size = self.image_size // 2 pixel_values = pixel_values[:, :, :image_size, :image_size] result = model(pixel_values, interpolate_pos_encoding=True, training=False) - expected_seq_length = (image_size // self.patch_size) ** 2 + 1 - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, expected_seq_length, self.hidden_size) - ) + seq_length = (image_size // self.patch_size) ** 2 + 1 + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, seq_length, self.hidden_size)) def create_and_check_for_image_classification(self, config, pixel_values, labels): config.num_labels = self.type_sequence_label_size @@ -166,12 +160,12 @@ class TFViTModelTest(TFModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() + @unittest.skip(reason="ViT does not use inputs_embeds") def test_inputs_embeds(self): - # ViT does not use inputs_embeds pass + @unittest.skip(reason="ViT does not use inputs_embeds") def test_graph_mode_with_inputs_embeds(self): - # ViT does not use inputs_embeds pass def test_model_common_attributes(self): @@ -199,131 +193,6 @@ class TFViTModelTest(TFModelTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) - # overwrite from common since `encoder_seq_length` and `encoder_key_length` are calculated - # in a different way than in text models. - @tooslow - def test_saved_model_creation_extended(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = True - config.output_attentions = True - - if hasattr(config, "use_cache"): - config.use_cache = True - - # in ViT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) - seq_len = self.model_tester.expected_seq_length - - for model_class in self.all_model_classes: - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - num_out = len(model(class_inputs_dict)) - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=True) - saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") - model = tf.keras.models.load_model(saved_model_dir) - outputs = model(class_inputs_dict) - - output_hidden_states = outputs["hidden_states"] - output_attentions = outputs["attentions"] - - self.assertEqual(len(outputs), num_out) - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - - self.assertEqual(len(output_hidden_states), expected_num_layers) - self.assertListEqual( - list(output_hidden_states[0].shape[-2:]), - [seq_len, self.model_tester.hidden_size], - ) - - self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(output_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - # in ViT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) - seq_len = self.model_tester.expected_seq_length - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False) - - self.assertEqual(out_len + 1, len(outputs)) - - self_attentions = outputs.attentions - - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - # ViT has a different seq_length - seq_length = self.model_tester.expected_seq_length - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification(*config_and_inputs) diff --git a/tests/vit/test_modeling_vit.py b/tests/vit/test_modeling_vit.py index 117815fa6d..6f88463b0e 100644 --- a/tests/vit/test_modeling_vit.py +++ b/tests/vit/test_modeling_vit.py @@ -81,9 +81,9 @@ class ViTModelTester: self.scope = scope self.encoder_stride = encoder_stride - # in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) + # in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token) num_patches = (image_size // patch_size) ** 2 - self.expected_seq_length = num_patches + 1 + self.seq_length = num_patches + 1 def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) @@ -118,9 +118,7 @@ class ViTModelTester: model.to(torch_device) model.eval() result = model(pixel_values) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size) - ) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) def create_and_check_for_image_classification(self, config, pixel_values, labels): config.num_labels = self.type_sequence_label_size @@ -169,8 +167,8 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() + @unittest.skip(reason="ViT does not use inputs_embeds") def test_inputs_embeds(self): - # ViT does not use inputs_embeds pass def test_model_common_attributes(self): @@ -198,93 +196,6 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - seq_len = self.model_tester.expected_seq_length - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - self.assertEqual(out_len + 1, len(outputs)) - - self_attentions = outputs.attentions - - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [self.model_tester.expected_seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification(*config_and_inputs)