Fix T5 incorrect weight decay in Trainer and official summarization example (#18002)

* Add ALL_LAYERNORM_LAYERS for LayerNorm

* fix bug of appending layer norm
This commit is contained in:
ADAning
2022-07-06 21:44:19 +08:00
committed by GitHub
parent 22edb68d49
commit bf37e5c7f6
5 changed files with 12 additions and 4 deletions

View File

@@ -526,7 +526,7 @@ def main():
# Optimizer
# Split weights in two groups, one with weight decay and the other not.
no_decay = ["bias", "LayerNorm.weight"]
no_decay = ["bias", "LayerNorm.weight", "layer_norm.weight"]
optimizer_grouped_parameters = [
{
"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],