diff --git a/examples/ner/run_ner.py b/examples/ner/run_ner.py index 08330dba7f..442fa97109 100644 --- a/examples/ner/run_ner.py +++ b/examples/ner/run_ner.py @@ -468,23 +468,15 @@ def main(): parser.add_argument( "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model." ) - parser.add_argument( "--keep_accents", action="store_const", const=True, help="Set this flag if model is trained with accents." ) - parser.add_argument( "--strip_accents", action="store_const", const=True, help="Set this flag if model is trained without accents." ) - parser.add_argument( - "--nouse_fast", - action="store_const", - dest="use_fast", - const=False, - help="Set this flag to not use fast tokenization.", + "--use_fast", action="store_const", const=True, help="Set this flag to use fast tokenization." ) - parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument( "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation." diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index dbaa50f565..2a1269b4a9 100644 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -600,7 +600,7 @@ class AlbertMLMHead(nn.Module): hidden_states = self.LayerNorm(hidden_states) hidden_states = self.decoder(hidden_states) - prediction_scores = hidden_states + self.bias + prediction_scores = hidden_states return prediction_scores