Merge remote-tracking branch 'upstream/master'
This commit is contained in:
@@ -57,7 +57,7 @@ class InputFeatures(object):
|
||||
|
||||
|
||||
def convert_examples_to_features(examples, seq_length, tokenizer):
|
||||
"""Loads a data file into a list of `InputBatch`s."""
|
||||
"""Loads a data file into a list of `InputFeature`s."""
|
||||
|
||||
features = []
|
||||
for (ex_index, example) in enumerate(examples):
|
||||
|
||||
@@ -49,7 +49,7 @@ class DocumentDatabase:
|
||||
self._precalculate_doc_weights()
|
||||
rand_start = self.doc_cumsum[current_idx]
|
||||
rand_end = rand_start + self.cumsum_max - self.doc_lengths[current_idx]
|
||||
sentence_index = randint(rand_start, rand_end) % self.cumsum_max
|
||||
sentence_index = randint(rand_start, rand_end-1) % self.cumsum_max
|
||||
sampled_doc_index = np.searchsorted(self.doc_cumsum, sentence_index, side='right')
|
||||
else:
|
||||
# If we don't use sentence weighting, then every doc has an equal chance to be chosen
|
||||
|
||||
@@ -442,7 +442,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
|
||||
# sequence or the second sequence. The embedding vectors for `type=0` and
|
||||
# `type=1` were learned during pre-training and are added to the wordpiece
|
||||
# embedding vector (and position vector). This is not *strictly* necessary
|
||||
# since the [SEP] token unambigiously separates the sequences, but it makes
|
||||
# since the [SEP] token unambiguously separates the sequences, but it makes
|
||||
# it easier for the model to learn the concept of sequences.
|
||||
#
|
||||
# For classification tasks, the first vector (corresponding to [CLS]) is
|
||||
|
||||
@@ -85,9 +85,9 @@ class SquadExample(object):
|
||||
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
|
||||
if self.start_position:
|
||||
s += ", start_position: %d" % (self.start_position)
|
||||
if self.start_position:
|
||||
if self.end_position:
|
||||
s += ", end_position: %d" % (self.end_position)
|
||||
if self.start_position:
|
||||
if self.is_impossible:
|
||||
s += ", is_impossible: %r" % (self.is_impossible)
|
||||
return s
|
||||
|
||||
|
||||
Reference in New Issue
Block a user