Check layer types for Optimizer construction (#10598)

* Check layer types for Optimizer construction

* Duplicate class
This commit is contained in:
Sylvain Gugger
2021-03-08 16:40:11 -05:00
committed by GitHub
parent 821d518e03
commit 3ced9b3eb9
4 changed files with 71 additions and 3 deletions

View File

@@ -80,6 +80,7 @@ from .trainer_pt_utils import (
SequentialDistributedSampler,
distributed_broadcast_scalars,
distributed_concat,
get_parameter_names,
nested_concat,
nested_detach,
nested_numpify,
@@ -613,14 +614,15 @@ class Trainer:
Trainer's init through :obj:`optimizers`, or subclass and override this method in a subclass.
"""
if self.optimizer is None:
no_decay = ["bias", "LayerNorm.weight"]
decay_parameters = get_parameter_names(self.model, [torch.nn.LayerNorm])
decay_parameters = [name for name in decay_parameters if "bias" not in name]
optimizer_grouped_parameters = [
{
"params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
"params": [p for n, p in self.model.named_parameters() if n in decay_parameters],
"weight_decay": self.args.weight_decay,
},
{
"params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
"params": [p for n, p in self.model.named_parameters() if n not in decay_parameters],
"weight_decay": 0.0,
},
]

View File

@@ -672,3 +672,19 @@ def save_state(self):
path = os.path.join(self.args.output_dir, "trainer_state.json")
self.state.save_to_json(path)
def get_parameter_names(model, forbidden_layer_types):
"""
Returns the names of the model parameters that are not inside a forbidden layer.
"""
result = []
for name, child in model.named_children():
result += [
f"{name}.{n}"
for n in get_parameter_names(child, forbidden_layer_types)
if not isinstance(child, tuple(forbidden_layer_types))
]
# Add model specific parameters (defined with nn.Parameter) since they are not in any child.
result += list(model._parameters.keys())
return result