Fix T5 incorrect weight decay in Trainer and official summarization example (#18002)
* Add ALL_LAYERNORM_LAYERS for LayerNorm * fix bug of appending layer norm
This commit is contained in:
@@ -526,7 +526,7 @@ def main():
|
||||
|
||||
# Optimizer
|
||||
# Split weights in two groups, one with weight decay and the other not.
|
||||
no_decay = ["bias", "LayerNorm.weight"]
|
||||
no_decay = ["bias", "LayerNorm.weight", "layer_norm.weight"]
|
||||
optimizer_grouped_parameters = [
|
||||
{
|
||||
"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
|
||||
|
||||
Reference in New Issue
Block a user