Use argument for preprocessing workers in run_summairzation (#15394)
This commit is contained in:
@@ -443,6 +443,7 @@ def main():
|
|||||||
processed_datasets = raw_datasets.map(
|
processed_datasets = raw_datasets.map(
|
||||||
preprocess_function,
|
preprocess_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
|
num_proc=args.preprocessing_num_workers,
|
||||||
remove_columns=column_names,
|
remove_columns=column_names,
|
||||||
load_from_cache_file=not args.overwrite_cache,
|
load_from_cache_file=not args.overwrite_cache,
|
||||||
desc="Running tokenizer on dataset",
|
desc="Running tokenizer on dataset",
|
||||||
|
|||||||
Reference in New Issue
Block a user