Merge pull request #427 from jeonsworld/patch-1

fix sample_doc
This commit is contained in:
Thomas Wolf
2019-04-03 11:26:58 +02:00
committed by GitHub

View File

@@ -49,7 +49,7 @@ class DocumentDatabase:
self._precalculate_doc_weights()
rand_start = self.doc_cumsum[current_idx]
rand_end = rand_start + self.cumsum_max - self.doc_lengths[current_idx]
sentence_index = randint(rand_start, rand_end) % self.cumsum_max
sentence_index = randint(rand_start, rand_end-1) % self.cumsum_max
sampled_doc_index = np.searchsorted(self.doc_cumsum, sentence_index, side='right')
else:
# If we don't use sentence weighting, then every doc has an equal chance to be chosen