fix rum_clm.py seeking text column name twice (#16624)

This commit is contained in:
Wonjae Kim
2022-04-19 22:38:25 +09:00
committed by GitHub
parent 3663fca41b
commit b74a955325

View File

@@ -347,10 +347,6 @@ def main():
column_names = raw_datasets["train"].column_names column_names = raw_datasets["train"].column_names
text_column_name = "text" if "text" in column_names else column_names[0] text_column_name = "text" if "text" in column_names else column_names[0]
# First we tokenize all the texts.
column_names = raw_datasets["train"].column_names
text_column_name = "text" if "text" in column_names else column_names[0]
def tokenize_function(examples): def tokenize_function(examples):
return tokenizer(examples[text_column_name]) return tokenizer(examples[text_column_name])