fix rum_clm.py seeking text column name twice (#16624)
This commit is contained in:
@@ -347,10 +347,6 @@ def main():
|
|||||||
column_names = raw_datasets["train"].column_names
|
column_names = raw_datasets["train"].column_names
|
||||||
text_column_name = "text" if "text" in column_names else column_names[0]
|
text_column_name = "text" if "text" in column_names else column_names[0]
|
||||||
|
|
||||||
# First we tokenize all the texts.
|
|
||||||
column_names = raw_datasets["train"].column_names
|
|
||||||
text_column_name = "text" if "text" in column_names else column_names[0]
|
|
||||||
|
|
||||||
def tokenize_function(examples):
|
def tokenize_function(examples):
|
||||||
return tokenizer(examples[text_column_name])
|
return tokenizer(examples[text_column_name])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user