Pruning for GPT and GPT-2
This commit is contained in:
@@ -453,6 +453,12 @@ class GPT2Model(GPT2PreTrainedModel):
|
|||||||
self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
|
self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
|
||||||
self.ln_f = LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
|
self.ln_f = LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
|
||||||
|
|
||||||
|
if hasattr(config, "pruned_heads"):
|
||||||
|
pruned_heads = config.pruned_heads.copy().items()
|
||||||
|
for layer, heads in pruned_heads:
|
||||||
|
if self.h[int(layer)].attn.n_head == config.n_head:
|
||||||
|
self.prune_heads({int(layer): list(map(int, heads))})
|
||||||
|
|
||||||
self.apply(self.init_weights)
|
self.apply(self.init_weights)
|
||||||
|
|
||||||
def _resize_token_embeddings(self, new_num_tokens):
|
def _resize_token_embeddings(self, new_num_tokens):
|
||||||
|
|||||||
@@ -456,6 +456,12 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
|
|||||||
self.drop = nn.Dropout(config.embd_pdrop)
|
self.drop = nn.Dropout(config.embd_pdrop)
|
||||||
self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
|
self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
|
||||||
|
|
||||||
|
if hasattr(config, "pruned_heads"):
|
||||||
|
pruned_heads = config.pruned_heads.copy().items()
|
||||||
|
for layer, heads in pruned_heads:
|
||||||
|
if self.h[int(layer)].attn.n_head == config.n_head:
|
||||||
|
self.prune_heads({int(layer): list(map(int, heads))})
|
||||||
|
|
||||||
self.apply(self.init_weights)
|
self.apply(self.init_weights)
|
||||||
|
|
||||||
def _resize_token_embeddings(self, new_num_tokens):
|
def _resize_token_embeddings(self, new_num_tokens):
|
||||||
|
|||||||
@@ -213,13 +213,12 @@ class CommonTestCases:
|
|||||||
if not self.test_pruning:
|
if not self.test_pruning:
|
||||||
return
|
return
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
if "head_mask" in inputs_dict:
|
|
||||||
del inputs_dict["head_mask"]
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
|
if "head_mask" in inputs_dict:
|
||||||
|
del inputs_dict["head_mask"]
|
||||||
|
|
||||||
config.output_attentions = True
|
config.output_attentions = True
|
||||||
config.output_hidden_states = False
|
config.output_hidden_states = False
|
||||||
model = model_class(config=config)
|
model = model_class(config=config)
|
||||||
@@ -244,6 +243,10 @@ class CommonTestCases:
|
|||||||
|
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
|
if "head_mask" in inputs_dict:
|
||||||
|
del inputs_dict["head_mask"]
|
||||||
|
|
||||||
config.output_attentions = True
|
config.output_attentions = True
|
||||||
config.output_hidden_states = False
|
config.output_hidden_states = False
|
||||||
model = model_class(config=config)
|
model = model_class(config=config)
|
||||||
@@ -274,6 +277,10 @@ class CommonTestCases:
|
|||||||
|
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
|
if "head_mask" in inputs_dict:
|
||||||
|
del inputs_dict["head_mask"]
|
||||||
|
|
||||||
config.output_attentions = True
|
config.output_attentions = True
|
||||||
config.output_hidden_states = False
|
config.output_hidden_states = False
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user