From 033c3ed95a14b58f5a657f5124bc5988e4109c9f Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Tue, 21 Dec 2021 09:17:28 -0800 Subject: [PATCH] [examples/summarization] deal with None in data records (#14816) * [examples/summarization] deal with None in data records * rewrite to use a simpler (slower) variant --- examples/pytorch/summarization/run_summarization.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index 658c241141..bc1af32823 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -436,8 +436,14 @@ def main(): ) def preprocess_function(examples): - inputs = examples[text_column] - targets = examples[summary_column] + + # remove pairs where at least one record is None + inputs, targets = [], [] + for i in range(len(examples[text_column])): + if examples[text_column][i] is not None and examples[summary_column][i] is not None: + inputs.append(examples[text_column][i]) + targets.append(examples[summary_column][i]) + inputs = [prefix + inp for inp in inputs] model_inputs = tokenizer(inputs, max_length=data_args.max_source_length, padding=padding, truncation=True)