From c98a6ac2117136f34c36b684adf661f89e6d1e50 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Fri, 28 Jan 2022 18:34:10 -0500 Subject: [PATCH] Use argument for preprocessing workers in run_summairzation (#15394) --- examples/pytorch/summarization/run_summarization_no_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index 7c36898fa5..2036d57e55 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -443,6 +443,7 @@ def main(): processed_datasets = raw_datasets.map( preprocess_function, batched=True, + num_proc=args.preprocessing_num_workers, remove_columns=column_names, load_from_cache_file=not args.overwrite_cache, desc="Running tokenizer on dataset",