From 350bb6bffab9655c98fa31d5b3d1f778f73d685a Mon Sep 17 00:00:00 2001 From: Rabeeh KARIMI Date: Fri, 30 Aug 2019 15:34:28 +0200 Subject: [PATCH] updated tokenizer loading for addressing reproducibility issues --- examples/run_glue.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/run_glue.py b/examples/run_glue.py index c221b0eb7a..9c23878884 100644 --- a/examples/run_glue.py +++ b/examples/run_glue.py @@ -448,13 +448,14 @@ def main(): # Load a trained model and vocabulary that you have fine-tuned model = model_class.from_pretrained(args.output_dir) - tokenizer = tokenizer_class.from_pretrained(args.output_dir) + tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) model.to(args.device) # Evaluation results = {} if args.do_eval and args.local_rank in [-1, 0]: + tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) @@ -463,7 +464,6 @@ def main(): for checkpoint in checkpoints: global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" model = model_class.from_pretrained(checkpoint) - tokenizer = tokenizer_class.from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, tokenizer, prefix=global_step) result = dict((k + '_{}'.format(global_step), v) for k, v in result.items())