Switch from using sum for flattening lists of lists in group_texts (#14472)
* remove sum for list flattening * change to chain(*) * make chain object a list * delete empty lines per sgugger's suggestions Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Nicholas Broad <nicholas@nmbroad.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -22,6 +22,7 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from itertools import chain
|
||||
from typing import Optional, Union
|
||||
|
||||
import datasets
|
||||
@@ -185,7 +186,7 @@ class DataCollatorForMultipleChoice:
|
||||
flattened_features = [
|
||||
[{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
|
||||
]
|
||||
flattened_features = sum(flattened_features, [])
|
||||
flattened_features = list(chain(*flattened_features))
|
||||
|
||||
batch = self.tokenizer.pad(
|
||||
flattened_features,
|
||||
@@ -333,8 +334,8 @@ def main():
|
||||
]
|
||||
|
||||
# Flatten out
|
||||
first_sentences = sum(first_sentences, [])
|
||||
second_sentences = sum(second_sentences, [])
|
||||
first_sentences = list(chain(*first_sentences))
|
||||
second_sentences = list(chain(*second_sentences))
|
||||
|
||||
# Tokenize
|
||||
tokenized_examples = tokenizer(
|
||||
|
||||
@@ -24,6 +24,7 @@ import math
|
||||
import os
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
@@ -224,7 +225,7 @@ class DataCollatorForMultipleChoice:
|
||||
flattened_features = [
|
||||
[{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
|
||||
]
|
||||
flattened_features = sum(flattened_features, [])
|
||||
flattened_features = list(chain(*flattened_features))
|
||||
|
||||
batch = self.tokenizer.pad(
|
||||
flattened_features,
|
||||
@@ -365,8 +366,8 @@ def main():
|
||||
labels = examples[label_column_name]
|
||||
|
||||
# Flatten out
|
||||
first_sentences = sum(first_sentences, [])
|
||||
second_sentences = sum(second_sentences, [])
|
||||
first_sentences = list(chain(*first_sentences))
|
||||
second_sentences = list(chain(*second_sentences))
|
||||
|
||||
# Tokenize
|
||||
tokenized_examples = tokenizer(
|
||||
|
||||
Reference in New Issue
Block a user