From 5444687f0f10319897d09e087258700252b1ca74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ivan=20Agarsk=C3=BD?= <agarskyivan@gmail.com>
Date: Mon, 21 Feb 2022 12:41:27 +0100
Subject: [PATCH] Fix minor comment typos (#15740)

---
 examples/research_projects/distillation/train.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/research_projects/distillation/train.py b/examples/research_projects/distillation/train.py
index 0d21ae04f8..6385c885a9 100644
--- a/examples/research_projects/distillation/train.py
+++ b/examples/research_projects/distillation/train.py
@@ -133,7 +133,7 @@ def main():
         "--alpha_mlm",
         default=0.0,
         type=float,
-        help="Linear weight for the MLM loss. Must be >=0. Should be used in coonjunction with `mlm` flag.",
+        help="Linear weight for the MLM loss. Must be >=0. Should be used in conjunction with `mlm` flag.",
     )
     parser.add_argument("--alpha_clm", default=0.5, type=float, help="Linear weight for the CLM loss. Must be >=0.")
     parser.add_argument("--alpha_mse", default=0.0, type=float, help="Linear weight of the MSE loss. Must be >=0.")
@@ -164,7 +164,7 @@ def main():
     parser.add_argument(
         "--restrict_ce_to_mask",
         action="store_true",
-        help="If true, compute the distilation loss only the [MLM] prediction distribution.",
+        help="If true, compute the distillation loss only the [MLM] prediction distribution.",
     )
     parser.add_argument(
         "--freeze_pos_embs",
@@ -192,7 +192,7 @@ def main():
         help="Gradient accumulation for larger training batches.",
     )
     parser.add_argument("--warmup_prop", default=0.05, type=float, help="Linear warmup proportion.")
-    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.")
+    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
     parser.add_argument("--learning_rate", default=5e-4, type=float, help="The initial learning rate for Adam.")
     parser.add_argument("--adam_epsilon", default=1e-6, type=float, help="Epsilon for Adam optimizer.")
     parser.add_argument("--max_grad_norm", default=5.0, type=float, help="Max gradient norm.")