Code parrot minor fixes/niceties (#14666)

* Add some nicety flags for better controlling evaluation. * Fix dependency issue with outdated requirement * Add additional flag to example to ensure eval is done * Wrap code into main function for accelerate launcher to find * Fix valid batch size flag in readme * Add note to install git-lfs when initializing/training the model * Update examples/research_projects/codeparrot/scripts/arguments.py Co-authored-by: Leandro von Werra <lvwerra@users.noreply.github.com> * Update examples/research_projects/codeparrot/README.md Co-authored-by: Leandro von Werra <lvwerra@users.noreply.github.com> * Revert "Wrap code into main function for accelerate launcher to find" This reverts commit ff11df1c810d4df198d04b827538eb4572147ba3. * Fix formatting issue * Move git-lfs instructions to installation section * Add a quick check before code generation for code evaluation * Fix styling issue * Update examples/research_projects/codeparrot/scripts/human_eval.py Co-authored-by: Leandro von Werra <lvwerra@users.noreply.github.com> * Make iterable dataset use passed in tokenizer rather than globally defined one Co-authored-by: Leandro von Werra <lvwerra@users.noreply.github.com> Co-authored-by: ncoop57 <nac33@students.uwf.edu>
2021-12-13 03:30:50 -05:00
parent 91f3dfbfdd
commit 48bf7e47a0
5 changed files with 28 additions and 6 deletions
--- a/examples/research_projects/codeparrot/scripts/human_eval.py
+++ b/examples/research_projects/codeparrot/scripts/human_eval.py
@@ -51,14 +51,23 @@ def main():
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(args.model_ckpt)
    model = AutoModelForCausalLM.from_pretrained(args.model_ckpt)
-    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
+    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=args.device_int)

    # Load evaluation dataset and metric
    human_eval = load_dataset("openai_humaneval")
    code_eval_metric = load_metric("code_eval")

+    # Run a quick test to see if code evaluation is enabled
+    try:
+        _ = code_eval_metric.compute(references=[""], predictions=[[""]])
+    except ValueError as exception:
+        print(
+            'Code evaluation not enabled. Read the warning below carefully and then use `--HF_ALLOW_CODE_EVAL="1"` flag to enable code evaluation.'
+        )
+        raise exception
+
    # Generate completions for evaluation set
-    n_tasks = 4  # len(human_eval["test"])
+    n_tasks = args.num_tasks if args.num_tasks is not None else len(human_eval["test"])
    generations, references = [], []
    for task in tqdm(range(n_tasks)):
        task_generations = []