From 924c624a465933d4b3e3ca878a4c7cb5cae91bac Mon Sep 17 00:00:00 2001 From: zeyuyun1 <43428393+zeyuyun1@users.noreply.github.com> Date: Thu, 12 Nov 2020 06:47:08 -0800 Subject: [PATCH] quick fix on concatenating text to support more datasets (#8474) --- examples/language-modeling/run_clm.py | 2 +- examples/language-modeling/run_mlm.py | 2 +- examples/language-modeling/run_plm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py index d2231e1703..9d9fd30b63 100644 --- a/examples/language-modeling/run_clm.py +++ b/examples/language-modeling/run_clm.py @@ -254,7 +254,7 @@ def main(): tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, - remove_columns=[text_column_name], + remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache, ) diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py index cd1cc3f26d..bf15b00d83 100644 --- a/examples/language-modeling/run_mlm.py +++ b/examples/language-modeling/run_mlm.py @@ -292,7 +292,7 @@ def main(): tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, - remove_columns=[text_column_name], + remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache, ) diff --git a/examples/language-modeling/run_plm.py b/examples/language-modeling/run_plm.py index 337ebb3e7e..bc1c3fd28e 100644 --- a/examples/language-modeling/run_plm.py +++ b/examples/language-modeling/run_plm.py @@ -279,7 +279,7 @@ def main(): tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, - remove_columns=[text_column_name], + remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache, )