From 5444687f0f10319897d09e087258700252b1ca74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ivan=20Agarsk=C3=BD?= Date: Mon, 21 Feb 2022 12:41:27 +0100 Subject: [PATCH] Fix minor comment typos (#15740) --- examples/research_projects/distillation/train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/research_projects/distillation/train.py b/examples/research_projects/distillation/train.py index 0d21ae04f8..6385c885a9 100644 --- a/examples/research_projects/distillation/train.py +++ b/examples/research_projects/distillation/train.py @@ -133,7 +133,7 @@ def main(): "--alpha_mlm", default=0.0, type=float, - help="Linear weight for the MLM loss. Must be >=0. Should be used in coonjunction with `mlm` flag.", + help="Linear weight for the MLM loss. Must be >=0. Should be used in conjunction with `mlm` flag.", ) parser.add_argument("--alpha_clm", default=0.5, type=float, help="Linear weight for the CLM loss. Must be >=0.") parser.add_argument("--alpha_mse", default=0.0, type=float, help="Linear weight of the MSE loss. Must be >=0.") @@ -164,7 +164,7 @@ def main(): parser.add_argument( "--restrict_ce_to_mask", action="store_true", - help="If true, compute the distilation loss only the [MLM] prediction distribution.", + help="If true, compute the distillation loss only the [MLM] prediction distribution.", ) parser.add_argument( "--freeze_pos_embs", @@ -192,7 +192,7 @@ def main(): help="Gradient accumulation for larger training batches.", ) parser.add_argument("--warmup_prop", default=0.05, type=float, help="Linear warmup proportion.") - parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.") + parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") parser.add_argument("--learning_rate", default=5e-4, type=float, help="The initial learning rate for Adam.") parser.add_argument("--adam_epsilon", default=1e-6, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=5.0, type=float, help="Max gradient norm.")