From 2eaa8b6e566b6ead6f1d4a2f53cf14b27ae10298 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Sat, 18 Jan 2020 01:23:56 +0000 Subject: [PATCH] Easier to not support this, as it could be confusing cc @lysandrejik --- examples/run_lm_finetuning.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/examples/run_lm_finetuning.py b/examples/run_lm_finetuning.py index 8e345ae8a2..c9a4bf2ecc 100644 --- a/examples/run_lm_finetuning.py +++ b/examples/run_lm_finetuning.py @@ -486,12 +486,6 @@ def main(): type=str, help="Optional pretrained tokenizer name or path if not the same as model_name_or_path. If both are None, initialize a new tokenizer.", ) - parser.add_argument( - "--tokenizer_init_args", - default="", - type=str, - help="If instantiating a new tokenizer, comma-separated list of input args to feed the constructor.", - ) parser.add_argument( "--cache_dir", default=None, @@ -661,11 +655,10 @@ def main(): elif args.model_name_or_path: tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir) else: - logger.warning( - "You are instantiating a new {} tokenizer from scratch. Are you sure this is what you meant to do?" - "To specifiy a pretrained tokenizer name, use --tokenizer_name".format(tokenizer_class.__name__) + raise ValueError( + "You are instantiating a new {} tokenizer. This is not supported, but you can do it from another script, save it," + "and load it from here, using --tokenizer_name".format(tokenizer_class.__name__) ) - tokenizer = tokenizer_class(*args.tokenizer_init_args.split(",")) if args.block_size <= 0: args.block_size = tokenizer.max_len_single_sentence