Use argument for preprocessing workers in run_summairzation (#15394)

This commit is contained in:
Sylvain Gugger
2022-01-28 18:34:10 -05:00
committed by GitHub
parent db07956740
commit c98a6ac211

View File

@@ -443,6 +443,7 @@ def main():
processed_datasets = raw_datasets.map(
preprocess_function,
batched=True,
num_proc=args.preprocessing_num_workers,
remove_columns=column_names,
load_from_cache_file=not args.overwrite_cache,
desc="Running tokenizer on dataset",