quick fix on concatenating text to support more datasets (#8474)
This commit is contained in:
@@ -254,7 +254,7 @@ def main():
|
|||||||
tokenize_function,
|
tokenize_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
num_proc=data_args.preprocessing_num_workers,
|
num_proc=data_args.preprocessing_num_workers,
|
||||||
remove_columns=[text_column_name],
|
remove_columns=column_names,
|
||||||
load_from_cache_file=not data_args.overwrite_cache,
|
load_from_cache_file=not data_args.overwrite_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -292,7 +292,7 @@ def main():
|
|||||||
tokenize_function,
|
tokenize_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
num_proc=data_args.preprocessing_num_workers,
|
num_proc=data_args.preprocessing_num_workers,
|
||||||
remove_columns=[text_column_name],
|
remove_columns=column_names,
|
||||||
load_from_cache_file=not data_args.overwrite_cache,
|
load_from_cache_file=not data_args.overwrite_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -279,7 +279,7 @@ def main():
|
|||||||
tokenize_function,
|
tokenize_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
num_proc=data_args.preprocessing_num_workers,
|
num_proc=data_args.preprocessing_num_workers,
|
||||||
remove_columns=[text_column_name],
|
remove_columns=column_names,
|
||||||
load_from_cache_file=not data_args.overwrite_cache,
|
load_from_cache_file=not data_args.overwrite_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user