Clean up vision tests (#17024)
* Clean up tests * Make fixup Co-authored-by: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
This commit is contained in:
@@ -96,9 +96,9 @@ class BeitModelTester:
|
|||||||
self.out_indices = out_indices
|
self.out_indices = out_indices
|
||||||
self.num_labels = num_labels
|
self.num_labels = num_labels
|
||||||
|
|
||||||
# in BeiT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
|
# in BeiT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
|
||||||
num_patches = (image_size // patch_size) ** 2
|
num_patches = (image_size // patch_size) ** 2
|
||||||
self.expected_seq_length = num_patches + 1
|
self.seq_length = num_patches + 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||||
@@ -136,16 +136,14 @@ class BeitModelTester:
|
|||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
result = model(pixel_values)
|
result = model(pixel_values)
|
||||||
self.parent.assertEqual(
|
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||||
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_for_masked_lm(self, config, pixel_values, labels, pixel_labels):
|
def create_and_check_for_masked_lm(self, config, pixel_values, labels, pixel_labels):
|
||||||
model = BeitForMaskedImageModeling(config=config)
|
model = BeitForMaskedImageModeling(config=config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
result = model(pixel_values)
|
result = model(pixel_values)
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.expected_seq_length - 1, self.vocab_size))
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length - 1, self.vocab_size))
|
||||||
|
|
||||||
def create_and_check_for_image_classification(self, config, pixel_values, labels, pixel_labels):
|
def create_and_check_for_image_classification(self, config, pixel_values, labels, pixel_labels):
|
||||||
config.num_labels = self.type_sequence_label_size
|
config.num_labels = self.type_sequence_label_size
|
||||||
@@ -155,7 +153,7 @@ class BeitModelTester:
|
|||||||
result = model(pixel_values, labels=labels)
|
result = model(pixel_values, labels=labels)
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
|
||||||
|
|
||||||
def create_and_check_for_image_segmentation(self, config, pixel_values, labels, pixel_labels):
|
def create_and_check_for_semantic_segmentation(self, config, pixel_values, labels, pixel_labels):
|
||||||
config.num_labels = self.num_labels
|
config.num_labels = self.num_labels
|
||||||
model = BeitForSemanticSegmentation(config)
|
model = BeitForSemanticSegmentation(config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
@@ -200,8 +198,8 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
@unittest.skip(reason="BEiT does not use inputs_embeds")
|
||||||
def test_inputs_embeds(self):
|
def test_inputs_embeds(self):
|
||||||
# BEiT does not use inputs_embeds
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_model_common_attributes(self):
|
def test_model_common_attributes(self):
|
||||||
@@ -229,9 +227,17 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||||
|
|
||||||
def test_for_image_segmentation(self):
|
def test_for_masked_lm(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_for_image_segmentation(*config_and_inputs)
|
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_image_classification(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_semantic_segmentation(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_for_semantic_segmentation(*config_and_inputs)
|
||||||
|
|
||||||
def test_training(self):
|
def test_training(self):
|
||||||
if not self.model_tester.is_training:
|
if not self.model_tester.is_training:
|
||||||
@@ -267,13 +273,7 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
or not model_class.supports_gradient_checkpointing
|
or not model_class.supports_gradient_checkpointing
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
# TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING
|
|
||||||
# this can then be incorporated into _prepare_for_class in test_modeling_common.py
|
|
||||||
elif model_class.__name__ == "BeitForSemanticSegmentation":
|
|
||||||
batch_size, num_channels, height, width = inputs_dict["pixel_values"].shape
|
|
||||||
inputs_dict["labels"] = torch.zeros(
|
|
||||||
[self.model_tester.batch_size, height, width], device=torch_device
|
|
||||||
).long()
|
|
||||||
model = model_class(config)
|
model = model_class(config)
|
||||||
model.gradient_checkpointing_enable()
|
model.gradient_checkpointing_enable()
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
@@ -300,106 +300,6 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_attention_outputs(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
config.return_dict = True
|
|
||||||
|
|
||||||
# BEiT has a different seq_length
|
|
||||||
seq_len = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = False
|
|
||||||
config.return_dict = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
# check that output_attentions also work using config
|
|
||||||
del inputs_dict["output_attentions"]
|
|
||||||
config.output_attentions = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
out_len = len(outputs)
|
|
||||||
|
|
||||||
# Check attention is always last and order is fine
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
self.assertEqual(out_len + 1, len(outputs))
|
|
||||||
|
|
||||||
self_attentions = outputs.attentions
|
|
||||||
|
|
||||||
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(self_attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_hidden_states_output(self):
|
|
||||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
hidden_states = outputs.hidden_states
|
|
||||||
|
|
||||||
expected_num_layers = getattr(
|
|
||||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
|
||||||
)
|
|
||||||
self.assertEqual(len(hidden_states), expected_num_layers)
|
|
||||||
|
|
||||||
# BEiT has a different seq_length
|
|
||||||
seq_length = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(hidden_states[0].shape[-2:]),
|
|
||||||
[seq_length, self.model_tester.hidden_size],
|
|
||||||
)
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
# check that output_hidden_states also work using config
|
|
||||||
del inputs_dict["output_hidden_states"]
|
|
||||||
config.output_hidden_states = True
|
|
||||||
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
def test_for_masked_lm(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
|
|
||||||
|
|
||||||
def test_for_image_classification(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_model_from_pretrained(self):
|
def test_model_from_pretrained(self):
|
||||||
for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||||
|
|||||||
@@ -75,9 +75,9 @@ class FlaxBeitModelTester(unittest.TestCase):
|
|||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
|
|
||||||
# in BeiT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
|
# in BeiT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
|
||||||
num_patches = (image_size // patch_size) ** 2
|
num_patches = (image_size // patch_size) ** 2
|
||||||
self.expected_seq_length = num_patches + 1
|
self.seq_length = num_patches + 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||||
@@ -108,14 +108,12 @@ class FlaxBeitModelTester(unittest.TestCase):
|
|||||||
|
|
||||||
model = FlaxBeitModel(config=config)
|
model = FlaxBeitModel(config=config)
|
||||||
result = model(pixel_values)
|
result = model(pixel_values)
|
||||||
self.parent.assertEqual(
|
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||||
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_for_masked_lm(self, config, pixel_values, labels):
|
def create_and_check_for_masked_lm(self, config, pixel_values, labels):
|
||||||
model = FlaxBeitForMaskedImageModeling(config=config)
|
model = FlaxBeitForMaskedImageModeling(config=config)
|
||||||
result = model(pixel_values)
|
result = model(pixel_values)
|
||||||
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.expected_seq_length - 1, self.vocab_size))
|
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length - 1, self.vocab_size))
|
||||||
|
|
||||||
def create_and_check_for_image_classification(self, config, pixel_values, labels):
|
def create_and_check_for_image_classification(self, config, pixel_values, labels):
|
||||||
config.num_labels = self.type_sequence_label_size
|
config.num_labels = self.type_sequence_label_size
|
||||||
@@ -148,51 +146,7 @@ class FlaxBeitModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
# We need to override this test because in Beit, the seq_len equals the number of patches + 1
|
# We need to override this test because Beit's forward signature is different than text models.
|
||||||
def test_attention_outputs(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
config.return_dict = True
|
|
||||||
|
|
||||||
seq_length = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = False
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
# check that output_attentions also work using config
|
|
||||||
del inputs_dict["output_attentions"]
|
|
||||||
config.output_attentions = True
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_length, seq_length],
|
|
||||||
)
|
|
||||||
out_len = len(outputs)
|
|
||||||
|
|
||||||
# Check attention is always last and order is fine
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
added_hidden_states = 1
|
|
||||||
self.assertEqual(out_len + added_hidden_states, len(outputs))
|
|
||||||
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_length, seq_length],
|
|
||||||
)
|
|
||||||
|
|
||||||
# We neeed to override this test because Beit's forward signature is different than text models.
|
|
||||||
def test_forward_signature(self):
|
def test_forward_signature(self):
|
||||||
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
|
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
@@ -229,34 +183,6 @@ class FlaxBeitModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
|||||||
for jitted_output, output in zip(jitted_outputs, outputs):
|
for jitted_output, output in zip(jitted_outputs, outputs):
|
||||||
self.assertEqual(jitted_output.shape, output.shape)
|
self.assertEqual(jitted_output.shape, output.shape)
|
||||||
|
|
||||||
# We need to override this test because in Beit, the seq_len equals the number of patches + 1
|
|
||||||
def test_hidden_states_output(self):
|
|
||||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
|
||||||
model = model_class(config)
|
|
||||||
seq_length = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
hidden_states = outputs.hidden_states
|
|
||||||
|
|
||||||
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(hidden_states[0].shape[-2:]),
|
|
||||||
[seq_length, self.model_tester.hidden_size],
|
|
||||||
)
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
# check that output_hidden_states also work using config
|
|
||||||
del inputs_dict["output_hidden_states"]
|
|
||||||
config.output_hidden_states = True
|
|
||||||
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
def test_model(self):
|
def test_model(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||||
|
|||||||
@@ -92,9 +92,9 @@ class DeiTModelTester:
|
|||||||
self.scope = scope
|
self.scope = scope
|
||||||
self.encoder_stride = encoder_stride
|
self.encoder_stride = encoder_stride
|
||||||
|
|
||||||
# in DeiT, the expected seq_len equals the number of patches + 2 (we add 2 for the [CLS] and distilation tokens)
|
# in DeiT, the seq length equals the number of patches + 2 (we add 2 for the [CLS] and distilation tokens)
|
||||||
num_patches = (image_size // patch_size) ** 2
|
num_patches = (image_size // patch_size) ** 2
|
||||||
self.expected_seq_length = num_patches + 2
|
self.seq_length = num_patches + 2
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||||
@@ -129,9 +129,7 @@ class DeiTModelTester:
|
|||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
result = model(pixel_values)
|
result = model(pixel_values)
|
||||||
self.parent.assertEqual(
|
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||||
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_for_image_classification(self, config, pixel_values, labels):
|
def create_and_check_for_image_classification(self, config, pixel_values, labels):
|
||||||
config.num_labels = self.type_sequence_label_size
|
config.num_labels = self.type_sequence_label_size
|
||||||
@@ -181,8 +179,8 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
@unittest.skip(reason="DeiT does not use inputs_embeds")
|
||||||
def test_inputs_embeds(self):
|
def test_inputs_embeds(self):
|
||||||
# DeiT does not use inputs_embeds
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_model_common_attributes(self):
|
def test_model_common_attributes(self):
|
||||||
@@ -210,94 +208,9 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||||
|
|
||||||
def test_attention_outputs(self):
|
def test_for_image_classification(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
config.return_dict = True
|
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
|
||||||
|
|
||||||
seq_len = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = False
|
|
||||||
config.return_dict = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
# check that output_attentions also work using config
|
|
||||||
del inputs_dict["output_attentions"]
|
|
||||||
config.output_attentions = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
out_len = len(outputs)
|
|
||||||
|
|
||||||
# Check attention is always last and order is fine
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
self.assertEqual(out_len + 1, len(outputs))
|
|
||||||
|
|
||||||
self_attentions = outputs.attentions
|
|
||||||
|
|
||||||
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(self_attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_hidden_states_output(self):
|
|
||||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
hidden_states = outputs.hidden_states
|
|
||||||
|
|
||||||
expected_num_layers = getattr(
|
|
||||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
|
||||||
)
|
|
||||||
self.assertEqual(len(hidden_states), expected_num_layers)
|
|
||||||
|
|
||||||
seq_length = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(hidden_states[0].shape[-2:]),
|
|
||||||
[seq_length, self.model_tester.hidden_size],
|
|
||||||
)
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
# check that output_hidden_states also work using config
|
|
||||||
del inputs_dict["output_hidden_states"]
|
|
||||||
config.output_hidden_states = True
|
|
||||||
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
# special case for DeiTForImageClassificationWithTeacher model
|
# special case for DeiTForImageClassificationWithTeacher model
|
||||||
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
||||||
@@ -403,10 +316,6 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
def test_for_image_classification(self):
|
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
|
||||||
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_model_from_pretrained(self):
|
def test_model_from_pretrained(self):
|
||||||
for model_name in DEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
for model_name in DEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||||
|
|||||||
@@ -81,9 +81,9 @@ class DPTModelTester:
|
|||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
self.num_labels = num_labels
|
self.num_labels = num_labels
|
||||||
self.scope = scope
|
self.scope = scope
|
||||||
# expected sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token)
|
# sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token)
|
||||||
num_patches = (image_size // patch_size) ** 2
|
num_patches = (image_size // patch_size) ** 2
|
||||||
self.expected_seq_length = num_patches + 1
|
self.seq_length = num_patches + 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||||
@@ -118,9 +118,7 @@ class DPTModelTester:
|
|||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
result = model(pixel_values)
|
result = model(pixel_values)
|
||||||
self.parent.assertEqual(
|
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||||
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_for_depth_estimation(self, config, pixel_values, labels):
|
def create_and_check_for_depth_estimation(self, config, pixel_values, labels):
|
||||||
config.num_labels = self.num_labels
|
config.num_labels = self.num_labels
|
||||||
@@ -167,8 +165,8 @@ class DPTModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
@unittest.skip(reason="DPT does not use inputs_embeds")
|
||||||
def test_inputs_embeds(self):
|
def test_inputs_embeds(self):
|
||||||
# DPT does not use inputs_embeds
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_model_common_attributes(self):
|
def test_model_common_attributes(self):
|
||||||
@@ -204,97 +202,6 @@ class DPTModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_for_semantic_segmentation(*config_and_inputs)
|
self.model_tester.create_and_check_for_semantic_segmentation(*config_and_inputs)
|
||||||
|
|
||||||
def test_attention_outputs(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
config.return_dict = True
|
|
||||||
|
|
||||||
# in DPT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
|
|
||||||
seq_len = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = False
|
|
||||||
config.return_dict = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
self.assertEqual(len(outputs.attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
# check that output_attentions also work using config
|
|
||||||
del inputs_dict["output_attentions"]
|
|
||||||
config.output_attentions = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
out_len = len(outputs)
|
|
||||||
|
|
||||||
# Check attention is always last and order is fine
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
self.assertEqual(out_len + 1, len(outputs))
|
|
||||||
|
|
||||||
self_attentions = outputs.attentions
|
|
||||||
|
|
||||||
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(self_attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_hidden_states_output(self):
|
|
||||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
hidden_states = outputs.hidden_states
|
|
||||||
|
|
||||||
expected_num_layers = getattr(
|
|
||||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
|
||||||
)
|
|
||||||
self.assertEqual(len(hidden_states), expected_num_layers)
|
|
||||||
|
|
||||||
# DPT has a different seq_length
|
|
||||||
seq_len = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(hidden_states[0].shape[-2:]),
|
|
||||||
[seq_len, self.model_tester.hidden_size],
|
|
||||||
)
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
# check that output_hidden_states also work using config
|
|
||||||
del inputs_dict["output_hidden_states"]
|
|
||||||
config.output_hidden_states = True
|
|
||||||
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
def test_training(self):
|
def test_training(self):
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
if model_class.__name__ == "DPTForDepthEstimation":
|
if model_class.__name__ == "DPTForDepthEstimation":
|
||||||
|
|||||||
@@ -67,9 +67,9 @@ class FlaxViTModelTester(unittest.TestCase):
|
|||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
|
|
||||||
# in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
|
# in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
|
||||||
num_patches = (image_size // patch_size) ** 2
|
num_patches = (image_size // patch_size) ** 2
|
||||||
self.expected_seq_length = num_patches + 1
|
self.seq_length = num_patches + 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||||
@@ -123,50 +123,6 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
# We need to override this test because in ViT, the seq_len equals the number of patches + 1
|
|
||||||
def test_attention_outputs(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
config.return_dict = True
|
|
||||||
|
|
||||||
seq_length = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = False
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
# check that output_attentions also work using config
|
|
||||||
del inputs_dict["output_attentions"]
|
|
||||||
config.output_attentions = True
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_length, seq_length],
|
|
||||||
)
|
|
||||||
out_len = len(outputs)
|
|
||||||
|
|
||||||
# Check attention is always last and order is fine
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
added_hidden_states = 1
|
|
||||||
self.assertEqual(out_len + added_hidden_states, len(outputs))
|
|
||||||
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_length, seq_length],
|
|
||||||
)
|
|
||||||
|
|
||||||
# We neeed to override this test because ViT's forward signature is different than text models.
|
# We neeed to override this test because ViT's forward signature is different than text models.
|
||||||
def test_forward_signature(self):
|
def test_forward_signature(self):
|
||||||
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
|
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
@@ -180,7 +136,7 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
|||||||
expected_arg_names = ["pixel_values"]
|
expected_arg_names = ["pixel_values"]
|
||||||
self.assertListEqual(arg_names[:1], expected_arg_names)
|
self.assertListEqual(arg_names[:1], expected_arg_names)
|
||||||
|
|
||||||
# We neeed to override this test because ViT expects pixel_values instead of input_ids
|
# We need to override this test because ViT expects pixel_values instead of input_ids
|
||||||
def test_jit_compilation(self):
|
def test_jit_compilation(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
@@ -204,35 +160,6 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
|||||||
for jitted_output, output in zip(jitted_outputs, outputs):
|
for jitted_output, output in zip(jitted_outputs, outputs):
|
||||||
self.assertEqual(jitted_output.shape, output.shape)
|
self.assertEqual(jitted_output.shape, output.shape)
|
||||||
|
|
||||||
# We need to override this test because in ViT, the seq_len equals the number of patches + 1
|
|
||||||
def test_hidden_states_output(self):
|
|
||||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
|
||||||
model = model_class(config)
|
|
||||||
|
|
||||||
seq_length = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
hidden_states = outputs.hidden_states
|
|
||||||
|
|
||||||
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(hidden_states[0].shape[-2:]),
|
|
||||||
[seq_length, self.model_tester.hidden_size],
|
|
||||||
)
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
# check that output_hidden_states also work using config
|
|
||||||
del inputs_dict["output_hidden_states"]
|
|
||||||
config.output_hidden_states = True
|
|
||||||
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_model_from_pretrained(self):
|
def test_model_from_pretrained(self):
|
||||||
for model_class_name in self.all_model_classes:
|
for model_class_name in self.all_model_classes:
|
||||||
|
|||||||
@@ -16,12 +16,10 @@
|
|||||||
|
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import ViTConfig
|
from transformers import ViTConfig
|
||||||
from transformers.testing_utils import require_tf, require_vision, slow, tooslow
|
from transformers.testing_utils import require_tf, require_vision, slow
|
||||||
from transformers.utils import cached_property, is_tf_available, is_vision_available
|
from transformers.utils import cached_property, is_tf_available, is_vision_available
|
||||||
|
|
||||||
from ..test_configuration_common import ConfigTester
|
from ..test_configuration_common import ConfigTester
|
||||||
@@ -80,9 +78,9 @@ class TFViTModelTester:
|
|||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
self.scope = scope
|
self.scope = scope
|
||||||
|
|
||||||
# in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
|
# in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
|
||||||
num_patches = (image_size // patch_size) ** 2
|
num_patches = (image_size // patch_size) ** 2
|
||||||
self.expected_seq_length = num_patches + 1
|
self.seq_length = num_patches + 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||||
@@ -114,18 +112,14 @@ class TFViTModelTester:
|
|||||||
def create_and_check_model(self, config, pixel_values, labels):
|
def create_and_check_model(self, config, pixel_values, labels):
|
||||||
model = TFViTModel(config=config)
|
model = TFViTModel(config=config)
|
||||||
result = model(pixel_values, training=False)
|
result = model(pixel_values, training=False)
|
||||||
self.parent.assertEqual(
|
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||||
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test with an image with different size than the one specified in config.
|
# Test with an image with different size than the one specified in config.
|
||||||
image_size = self.image_size // 2
|
image_size = self.image_size // 2
|
||||||
pixel_values = pixel_values[:, :, :image_size, :image_size]
|
pixel_values = pixel_values[:, :, :image_size, :image_size]
|
||||||
result = model(pixel_values, interpolate_pos_encoding=True, training=False)
|
result = model(pixel_values, interpolate_pos_encoding=True, training=False)
|
||||||
expected_seq_length = (image_size // self.patch_size) ** 2 + 1
|
seq_length = (image_size // self.patch_size) ** 2 + 1
|
||||||
self.parent.assertEqual(
|
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, seq_length, self.hidden_size))
|
||||||
result.last_hidden_state.shape, (self.batch_size, expected_seq_length, self.hidden_size)
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_for_image_classification(self, config, pixel_values, labels):
|
def create_and_check_for_image_classification(self, config, pixel_values, labels):
|
||||||
config.num_labels = self.type_sequence_label_size
|
config.num_labels = self.type_sequence_label_size
|
||||||
@@ -166,12 +160,12 @@ class TFViTModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
@unittest.skip(reason="ViT does not use inputs_embeds")
|
||||||
def test_inputs_embeds(self):
|
def test_inputs_embeds(self):
|
||||||
# ViT does not use inputs_embeds
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@unittest.skip(reason="ViT does not use inputs_embeds")
|
||||||
def test_graph_mode_with_inputs_embeds(self):
|
def test_graph_mode_with_inputs_embeds(self):
|
||||||
# ViT does not use inputs_embeds
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_model_common_attributes(self):
|
def test_model_common_attributes(self):
|
||||||
@@ -199,131 +193,6 @@ class TFViTModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||||
|
|
||||||
# overwrite from common since `encoder_seq_length` and `encoder_key_length` are calculated
|
|
||||||
# in a different way than in text models.
|
|
||||||
@tooslow
|
|
||||||
def test_saved_model_creation_extended(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
config.output_hidden_states = True
|
|
||||||
config.output_attentions = True
|
|
||||||
|
|
||||||
if hasattr(config, "use_cache"):
|
|
||||||
config.use_cache = True
|
|
||||||
|
|
||||||
# in ViT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
|
|
||||||
seq_len = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
|
|
||||||
model = model_class(config)
|
|
||||||
num_out = len(model(class_inputs_dict))
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
||||||
model.save_pretrained(tmpdirname, saved_model=True)
|
|
||||||
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
|
|
||||||
model = tf.keras.models.load_model(saved_model_dir)
|
|
||||||
outputs = model(class_inputs_dict)
|
|
||||||
|
|
||||||
output_hidden_states = outputs["hidden_states"]
|
|
||||||
output_attentions = outputs["attentions"]
|
|
||||||
|
|
||||||
self.assertEqual(len(outputs), num_out)
|
|
||||||
|
|
||||||
expected_num_layers = getattr(
|
|
||||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(len(output_hidden_states), expected_num_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(output_hidden_states[0].shape[-2:]),
|
|
||||||
[seq_len, self.model_tester.hidden_size],
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(output_attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_attention_outputs(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
config.return_dict = True
|
|
||||||
|
|
||||||
# in ViT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
|
|
||||||
seq_len = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = False
|
|
||||||
config.return_dict = True
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
# check that output_attentions also work using config
|
|
||||||
del inputs_dict["output_attentions"]
|
|
||||||
config.output_attentions = True
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
out_len = len(outputs)
|
|
||||||
|
|
||||||
# Check attention is always last and order is fine
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
model = model_class(config)
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
|
|
||||||
|
|
||||||
self.assertEqual(out_len + 1, len(outputs))
|
|
||||||
|
|
||||||
self_attentions = outputs.attentions
|
|
||||||
|
|
||||||
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(self_attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_hidden_states_output(self):
|
|
||||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
|
||||||
model = model_class(config)
|
|
||||||
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
hidden_states = outputs.hidden_states
|
|
||||||
|
|
||||||
expected_num_layers = getattr(
|
|
||||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
|
||||||
)
|
|
||||||
self.assertEqual(len(hidden_states), expected_num_layers)
|
|
||||||
|
|
||||||
# ViT has a different seq_length
|
|
||||||
seq_length = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(hidden_states[0].shape[-2:]),
|
|
||||||
[seq_length, self.model_tester.hidden_size],
|
|
||||||
)
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
# check that output_hidden_states also work using config
|
|
||||||
del inputs_dict["output_hidden_states"]
|
|
||||||
config.output_hidden_states = True
|
|
||||||
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
def test_for_image_classification(self):
|
def test_for_image_classification(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
|
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
|
||||||
|
|||||||
@@ -81,9 +81,9 @@ class ViTModelTester:
|
|||||||
self.scope = scope
|
self.scope = scope
|
||||||
self.encoder_stride = encoder_stride
|
self.encoder_stride = encoder_stride
|
||||||
|
|
||||||
# in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
|
# in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
|
||||||
num_patches = (image_size // patch_size) ** 2
|
num_patches = (image_size // patch_size) ** 2
|
||||||
self.expected_seq_length = num_patches + 1
|
self.seq_length = num_patches + 1
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
|
||||||
@@ -118,9 +118,7 @@ class ViTModelTester:
|
|||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
model.eval()
|
model.eval()
|
||||||
result = model(pixel_values)
|
result = model(pixel_values)
|
||||||
self.parent.assertEqual(
|
self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
|
||||||
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
|
|
||||||
)
|
|
||||||
|
|
||||||
def create_and_check_for_image_classification(self, config, pixel_values, labels):
|
def create_and_check_for_image_classification(self, config, pixel_values, labels):
|
||||||
config.num_labels = self.type_sequence_label_size
|
config.num_labels = self.type_sequence_label_size
|
||||||
@@ -169,8 +167,8 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
@unittest.skip(reason="ViT does not use inputs_embeds")
|
||||||
def test_inputs_embeds(self):
|
def test_inputs_embeds(self):
|
||||||
# ViT does not use inputs_embeds
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def test_model_common_attributes(self):
|
def test_model_common_attributes(self):
|
||||||
@@ -198,93 +196,6 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_model(*config_and_inputs)
|
self.model_tester.create_and_check_model(*config_and_inputs)
|
||||||
|
|
||||||
def test_attention_outputs(self):
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
config.return_dict = True
|
|
||||||
|
|
||||||
seq_len = self.model_tester.expected_seq_length
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = False
|
|
||||||
config.return_dict = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
# check that output_attentions also work using config
|
|
||||||
del inputs_dict["output_attentions"]
|
|
||||||
config.output_attentions = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
attentions = outputs.attentions
|
|
||||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
out_len = len(outputs)
|
|
||||||
|
|
||||||
# Check attention is always last and order is fine
|
|
||||||
inputs_dict["output_attentions"] = True
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
self.assertEqual(out_len + 1, len(outputs))
|
|
||||||
|
|
||||||
self_attentions = outputs.attentions
|
|
||||||
|
|
||||||
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
|
|
||||||
self.assertListEqual(
|
|
||||||
list(self_attentions[0].shape[-3:]),
|
|
||||||
[self.model_tester.num_attention_heads, seq_len, seq_len],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_hidden_states_output(self):
|
|
||||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
hidden_states = outputs.hidden_states
|
|
||||||
|
|
||||||
expected_num_layers = getattr(
|
|
||||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
|
||||||
)
|
|
||||||
self.assertEqual(len(hidden_states), expected_num_layers)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
list(hidden_states[0].shape[-2:]),
|
|
||||||
[self.model_tester.expected_seq_length, self.model_tester.hidden_size],
|
|
||||||
)
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
inputs_dict["output_hidden_states"] = True
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
# check that output_hidden_states also work using config
|
|
||||||
del inputs_dict["output_hidden_states"]
|
|
||||||
config.output_hidden_states = True
|
|
||||||
|
|
||||||
check_hidden_states_output(inputs_dict, config, model_class)
|
|
||||||
|
|
||||||
def test_for_image_classification(self):
|
def test_for_image_classification(self):
|
||||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
|
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
|
||||||
|
|||||||
Reference in New Issue
Block a user