Merge pull request #1055 from qipeng/run_squad_fix

Fix #1015 (tokenizer defaults to use_lower_case=True when loading from trained models)
2019-08-21 01:20:46 +02:00
parent e753f249e1 3bffd2e8e5
commit 9beaa85b07
2 changed files with 3 additions and 3 deletions
--- a/examples/run_glue.py
+++ b/examples/run_glue.py
@@ -467,7 +467,7 @@ def main():
        # Load a trained model and vocabulary that you have fine-tuned
        model = model_class.from_pretrained(args.output_dir)
-        tokenizer = tokenizer_class.from_pretrained(args.output_dir)
+        tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        model.to(args.device)
--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -481,7 +481,7 @@ def main():
    # Save the trained model and the tokenizer
-    if args.do_train and args.local_rank == -1 or torch.distributed.get_rank() == 0:
+    if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)
@@ -498,7 +498,7 @@ def main():
        # Load a trained model and vocabulary that you have fine-tuned
        model = model_class.from_pretrained(args.output_dir)
-        tokenizer = tokenizer_class.from_pretrained(args.output_dir)
+        tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        model.to(args.device)