From 9e376e156a78aa08f802d569d829064aff930c58 Mon Sep 17 00:00:00 2001 From: Donna Choi <54914459+choidongyeon@users.noreply.github.com> Date: Wed, 16 Sep 2020 06:15:10 -0700 Subject: [PATCH] Add condition (#7161) --- src/transformers/data/data_collator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py index 9f30335f86..a2d001034b 100644 --- a/src/transformers/data/data_collator.py +++ b/src/transformers/data/data_collator.py @@ -505,10 +505,11 @@ class DataCollatorForNextSentencePrediction: # This should rarely go for more than one iteration for large # corpora. However, just to be careful, we try to make sure that # the random document is not the same as the document - # we're processing. + # we're processing. Also check to make sure that the random document + # is not empty. for _ in range(10): random_document_index = random.randint(0, len(examples) - 1) - if random_document_index != doc_index: + if random_document_index != doc_index and len(examples[random_document_index]) > 0: break random_document = examples[random_document_index]