From eab980fd684d82e7e2f9e045e570389fe4f7e693 Mon Sep 17 00:00:00 2001 From: searchivarius Date: Mon, 9 Sep 2019 19:58:08 -0400 Subject: [PATCH] Fix to prevent crashing on assert len(tokens_b)>=1 --- examples/lm_finetuning/pregenerate_training_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/lm_finetuning/pregenerate_training_data.py b/examples/lm_finetuning/pregenerate_training_data.py index ff40d95f75..2906c6fd2b 100644 --- a/examples/lm_finetuning/pregenerate_training_data.py +++ b/examples/lm_finetuning/pregenerate_training_data.py @@ -329,7 +329,8 @@ def main(): doc = [] else: tokens = tokenizer.tokenize(line) - doc.append(tokens) + if tokens: + doc.append(tokens) if doc: docs.add_document(doc) # If the last doc didn't end on a newline, make sure it still gets added if len(docs) <= 1: