From 2eaa8b6e566b6ead6f1d4a2f53cf14b27ae10298 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <chaumond@gmail.com>
Date: Sat, 18 Jan 2020 01:23:56 +0000
Subject: [PATCH] Easier to not support this, as it could be confusing

cc @lysandrejik
---
 examples/run_lm_finetuning.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/examples/run_lm_finetuning.py b/examples/run_lm_finetuning.py
index 8e345ae8a2..c9a4bf2ecc 100644
--- a/examples/run_lm_finetuning.py
+++ b/examples/run_lm_finetuning.py
@@ -486,12 +486,6 @@ def main():
         type=str,
         help="Optional pretrained tokenizer name or path if not the same as model_name_or_path. If both are None, initialize a new tokenizer.",
     )
-    parser.add_argument(
-        "--tokenizer_init_args",
-        default="",
-        type=str,
-        help="If instantiating a new tokenizer, comma-separated list of input args to feed the constructor.",
-    )
     parser.add_argument(
         "--cache_dir",
         default=None,
@@ -661,11 +655,10 @@ def main():
     elif args.model_name_or_path:
         tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path, cache_dir=args.cache_dir)
     else:
-        logger.warning(
-            "You are instantiating a new {} tokenizer from scratch. Are you sure this is what you meant to do?"
-            "To specifiy a pretrained tokenizer name, use --tokenizer_name".format(tokenizer_class.__name__)
+        raise ValueError(
+            "You are instantiating a new {} tokenizer. This is not supported, but you can do it from another script, save it,"
+            "and load it from here, using --tokenizer_name".format(tokenizer_class.__name__)
         )
-        tokenizer = tokenizer_class(*args.tokenizer_init_args.split(","))
 
     if args.block_size <= 0:
         args.block_size = tokenizer.max_len_single_sentence