From b74a955325ef78c6d07b62c4f9be13ef0df170da Mon Sep 17 00:00:00 2001 From: Wonjae Kim Date: Tue, 19 Apr 2022 22:38:25 +0900 Subject: [PATCH] fix `rum_clm.py` seeking text column name twice (#16624) --- examples/tensorflow/language-modeling/run_clm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py index 84e71efe50..3598ad668a 100755 --- a/examples/tensorflow/language-modeling/run_clm.py +++ b/examples/tensorflow/language-modeling/run_clm.py @@ -347,10 +347,6 @@ def main(): column_names = raw_datasets["train"].column_names text_column_name = "text" if "text" in column_names else column_names[0] - # First we tokenize all the texts. - column_names = raw_datasets["train"].column_names - text_column_name = "text" if "text" in column_names else column_names[0] - def tokenize_function(examples): return tokenizer(examples[text_column_name])