quick fix on concatenating text to support more datasets (#8474)

This commit is contained in:
zeyuyun1
2020-11-12 06:47:08 -08:00
committed by GitHub
parent 17b1fd804f
commit 924c624a46
3 changed files with 3 additions and 3 deletions

View File

@@ -254,7 +254,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )

View File

@@ -292,7 +292,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )

View File

@@ -279,7 +279,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )