From 374a2f693f75305eded1a2bb7a7e452f0ab33fad Mon Sep 17 00:00:00 2001 From: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Date: Tue, 24 May 2022 14:51:26 +0200 Subject: [PATCH] Clean up CLIP tests (#17380) Co-authored-by: Niels Rogge --- tests/models/clip/test_modeling_clip.py | 123 +++--------------------- 1 file changed, 14 insertions(+), 109 deletions(-) diff --git a/tests/models/clip/test_modeling_clip.py b/tests/models/clip/test_modeling_clip.py index 7ae1146159..02b982e4ef 100644 --- a/tests/models/clip/test_modeling_clip.py +++ b/tests/models/clip/test_modeling_clip.py @@ -100,6 +100,10 @@ class CLIPVisionModelTester: self.initializer_range = initializer_range self.scope = scope + # in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token) + num_patches = (image_size // patch_size) ** 2 + self.seq_length = num_patches + 1 + def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) config = self.get_config() @@ -160,8 +164,8 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() + @unittest.skip(reason="CLIP does not use inputs_embeds") def test_inputs_embeds(self): - # CLIP does not use inputs_embeds pass def test_model_common_attributes(self): @@ -189,114 +193,17 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) - def test_attention_outputs(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.return_dict = True - - # in CLIP, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) - image_size = (self.model_tester.image_size, self.model_tester.image_size) - patch_size = (self.model_tester.patch_size, self.model_tester.patch_size) - num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) - seq_len = num_patches + 1 - - for model_class in self.all_model_classes: - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = False - config.return_dict = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - # check that output_attentions also work using config - del inputs_dict["output_attentions"] - config.output_attentions = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - attentions = outputs.attentions - self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) - - out_len = len(outputs) - - # Check attention is always last and order is fine - inputs_dict["output_attentions"] = True - inputs_dict["output_hidden_states"] = True - model = model_class(config) - model.to(torch_device) - model.eval() - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - added_hidden_states = 1 - self.assertEqual(out_len + added_hidden_states, len(outputs)) - - self_attentions = outputs.attentions - - self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) - - self.assertListEqual( - list(self_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, seq_len, seq_len], - ) - - def test_hidden_states_output(self): - def check_hidden_states_output(inputs_dict, config, model_class): - model = model_class(config) - model.to(torch_device) - model.eval() - - with torch.no_grad(): - outputs = model(**self._prepare_for_class(inputs_dict, model_class)) - - hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - self.assertEqual(len(hidden_states), expected_num_layers) - - # CLIP has a different seq_length - image_size = (self.model_tester.image_size, self.model_tester.image_size) - patch_size = (self.model_tester.patch_size, self.model_tester.patch_size) - num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) - seq_length = num_patches + 1 - - self.assertListEqual( - list(hidden_states[0].shape[-2:]), - [seq_length, self.model_tester.hidden_size], - ) - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - inputs_dict["output_hidden_states"] = True - check_hidden_states_output(inputs_dict, config, model_class) - - # check that output_hidden_states also work using config - del inputs_dict["output_hidden_states"] - config.output_hidden_states = True - - check_hidden_states_output(inputs_dict, config, model_class) - def test_training(self): pass def test_training_gradient_checkpointing(self): pass - # skip this test as CLIPVisionModel has no base class and is - # not available in MODEL_MAPPING + @unittest.skip(reason="CLIPVisionModel has no base class and is not available in MODEL_MAPPING") def test_save_load_fast_init_from_base(self): pass - # skip this test as CLIPVisionModel has no base class and is - # not available in MODEL_MAPPING + @unittest.skip(reason="CLIPVisionModel has no base class and is not available in MODEL_MAPPING") def test_save_load_fast_init_to_base(self): pass @@ -416,17 +323,15 @@ class CLIPTextModelTest(ModelTesterMixin, unittest.TestCase): def test_training_gradient_checkpointing(self): pass + @unittest.skip(reason="CLIP does not use inputs_embeds") def test_inputs_embeds(self): - # CLIP does not use inputs_embeds pass - # skip this test as CLIPTextModel has no base class and is - # not available in MODEL_MAPPING + @unittest.skip(reason="CLIPTextModel has no base class and is not available in MODEL_MAPPING") def test_save_load_fast_init_from_base(self): pass - # skip this test as CLIPTextModel has no base class and is - # not available in MODEL_MAPPING + @unittest.skip(reason="CLIPTextModel has no base class and is not available in MODEL_MAPPING") def test_save_load_fast_init_to_base(self): pass @@ -495,19 +400,19 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) - # hidden_states are tested in individual model tests + @unittest.skip(reason="Hidden_states is tested in individual model tests") def test_hidden_states_output(self): pass - # input_embeds are tested in individual model tests + @unittest.skip(reason="Inputs_embeds is tested in individual model tests") def test_inputs_embeds(self): pass - # tested in individual model tests + @unittest.skip(reason="Retain_grad is tested in individual model tests") def test_retain_grad_hidden_states_attentions(self): pass - # CLIPModel does not have input/output embeddings + @unittest.skip(reason="CLIPModel does not have input/output embeddings") def test_model_common_attributes(self): pass