CI with num_hidden_layers=2 🚀🚀🚀 (#25266)
* CI with layers=2 --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -1017,7 +1017,8 @@ class ModelTesterMixin:
|
||||
attentions = outputs[-1]
|
||||
|
||||
self.assertEqual(attentions[0].shape[-3], 1)
|
||||
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
|
||||
# TODO: To have this check, we will need at least 3 layers. Do we really need it?
|
||||
# self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
|
||||
self.assertEqual(attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
|
||||
|
||||
def test_head_pruning_save_load_from_pretrained(self):
|
||||
@@ -1053,7 +1054,8 @@ class ModelTesterMixin:
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
attentions = outputs[-1]
|
||||
self.assertEqual(attentions[0].shape[-3], 1)
|
||||
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
|
||||
# TODO: To have this check, we will need at least 3 layers. Do we really need it?
|
||||
# self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
|
||||
self.assertEqual(attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
|
||||
|
||||
def test_head_pruning_save_load_from_config_init(self):
|
||||
@@ -1087,7 +1089,8 @@ class ModelTesterMixin:
|
||||
attentions = outputs[-1]
|
||||
|
||||
self.assertEqual(attentions[0].shape[-3], 1)
|
||||
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
|
||||
# TODO: To have this check, we will need at least 3 layers. Do we really need it?
|
||||
# self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
|
||||
self.assertEqual(attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
|
||||
|
||||
def test_head_pruning_integration(self):
|
||||
@@ -1106,7 +1109,7 @@ class ModelTesterMixin:
|
||||
inputs_dict["output_attentions"] = True
|
||||
config.output_hidden_states = False
|
||||
|
||||
heads_to_prune = {0: [0], 1: [1, 2]}
|
||||
heads_to_prune = {1: [1, 2]}
|
||||
config.pruned_heads = heads_to_prune
|
||||
|
||||
model = model_class(config=config)
|
||||
@@ -1117,10 +1120,8 @@ class ModelTesterMixin:
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
attentions = outputs[-1]
|
||||
|
||||
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 1)
|
||||
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 0)
|
||||
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads - 2)
|
||||
self.assertEqual(attentions[2].shape[-3], self.model_tester.num_attention_heads)
|
||||
self.assertEqual(attentions[3].shape[-3], self.model_tester.num_attention_heads)
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir_name:
|
||||
model.save_pretrained(temp_dir_name)
|
||||
@@ -1131,12 +1132,10 @@ class ModelTesterMixin:
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
attentions = outputs[-1]
|
||||
|
||||
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 1)
|
||||
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 0)
|
||||
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads - 2)
|
||||
self.assertEqual(attentions[2].shape[-3], self.model_tester.num_attention_heads)
|
||||
self.assertEqual(attentions[3].shape[-3], self.model_tester.num_attention_heads)
|
||||
|
||||
heads_to_prune = {0: [0], 2: [1, 2]}
|
||||
heads_to_prune = {0: [0], 1: [1, 2]}
|
||||
model.prune_heads(heads_to_prune)
|
||||
|
||||
with torch.no_grad():
|
||||
@@ -1145,10 +1144,8 @@ class ModelTesterMixin:
|
||||
|
||||
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 1)
|
||||
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads - 2)
|
||||
self.assertEqual(attentions[2].shape[-3], self.model_tester.num_attention_heads - 2)
|
||||
self.assertEqual(attentions[3].shape[-3], self.model_tester.num_attention_heads)
|
||||
|
||||
self.assertDictEqual(model.config.pruned_heads, {0: [0], 1: [1, 2], 2: [1, 2]})
|
||||
self.assertDictEqual(model.config.pruned_heads, {0: [0], 1: [1, 2]})
|
||||
|
||||
def test_hidden_states_output(self):
|
||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
||||
|
||||
Reference in New Issue
Block a user