diff --git a/examples/ner/run_ner.py b/examples/ner/run_ner.py
index 08330dba7f..442fa97109 100644
--- a/examples/ner/run_ner.py
+++ b/examples/ner/run_ner.py
@@ -468,23 +468,15 @@ def main():
     parser.add_argument(
         "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model."
     )
-
     parser.add_argument(
         "--keep_accents", action="store_const", const=True, help="Set this flag if model is trained with accents."
     )
-
     parser.add_argument(
         "--strip_accents", action="store_const", const=True, help="Set this flag if model is trained without accents."
     )
-
     parser.add_argument(
-        "--nouse_fast",
-        action="store_const",
-        dest="use_fast",
-        const=False,
-        help="Set this flag to not use fast tokenization.",
+        "--use_fast", action="store_const", const=True, help="Set this flag to use fast tokenization."
     )
-
     parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
     parser.add_argument(
         "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation."
diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py
index dbaa50f565..2a1269b4a9 100644
--- a/src/transformers/modeling_albert.py
+++ b/src/transformers/modeling_albert.py
@@ -600,7 +600,7 @@ class AlbertMLMHead(nn.Module):
         hidden_states = self.LayerNorm(hidden_states)
         hidden_states = self.decoder(hidden_states)
 
-        prediction_scores = hidden_states + self.bias
+        prediction_scores = hidden_states
 
         return prediction_scores