[examples/summarization] deal with None in data records (#14816)
* [examples/summarization] deal with None in data records * rewrite to use a simpler (slower) variant
This commit is contained in:
@@ -436,8 +436,14 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
def preprocess_function(examples):
|
def preprocess_function(examples):
|
||||||
inputs = examples[text_column]
|
|
||||||
targets = examples[summary_column]
|
# remove pairs where at least one record is None
|
||||||
|
inputs, targets = [], []
|
||||||
|
for i in range(len(examples[text_column])):
|
||||||
|
if examples[text_column][i] is not None and examples[summary_column][i] is not None:
|
||||||
|
inputs.append(examples[text_column][i])
|
||||||
|
targets.append(examples[summary_column][i])
|
||||||
|
|
||||||
inputs = [prefix + inp for inp in inputs]
|
inputs = [prefix + inp for inp in inputs]
|
||||||
model_inputs = tokenizer(inputs, max_length=data_args.max_source_length, padding=padding, truncation=True)
|
model_inputs = tokenizer(inputs, max_length=data_args.max_source_length, padding=padding, truncation=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user