From 0541442558edb3771ec469b46bf236c0679a1cb2 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Fri, 30 Nov 2018 13:47:33 +0100 Subject: [PATCH] add do_lower_case in examples --- examples/extract_features.py | 3 ++- examples/run_classifier.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/extract_features.py b/examples/extract_features.py index abe7fdffe7..dbab934c08 100644 --- a/examples/extract_features.py +++ b/examples/extract_features.py @@ -199,6 +199,7 @@ def main(): "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.") ## Other parameters + parser.add_argument("--do_lower_case", default=False, action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--layers", default="-1,-2,-3,-4", type=str) parser.add_argument("--max_seq_length", default=128, type=int, help="The maximum total input sequence length after WordPiece tokenization. Sequences longer " @@ -227,7 +228,7 @@ def main(): layer_indexes = [int(x) for x in args.layers.split(",")] - tokenizer = BertTokenizer.from_pretrained(args.bert_model) + tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) examples = read_examples(args.input_file) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index 2c83b4fe49..52f3cd752d 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -376,6 +376,10 @@ def main(): default=False, action='store_true', help="Whether to run eval on the dev set.") + parser.add_argument("--do_lower_case", + default=False, + action='store_true', + help="Set this flag if you are using an uncased model.") parser.add_argument("--train_batch_size", default=32, type=int, @@ -473,7 +477,7 @@ def main(): processor = processors[task_name]() label_list = processor.get_labels() - tokenizer = BertTokenizer.from_pretrained(args.bert_model) + tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) train_examples = None num_train_steps = None