Merge remote-tracking branch 'upstream/master'

2019-04-07 16:59:36 +05:30
parent 4d3cf0d602 94980b529f
commit 6c4c7be282
11 changed files with 56 additions and 20 deletions
--- a/examples/extract_features.py
+++ b/examples/extract_features.py
@@ -57,7 +57,7 @@ class InputFeatures(object):


 def convert_examples_to_features(examples, seq_length, tokenizer):
-    """Loads a data file into a list of `InputBatch`s."""
+    """Loads a data file into a list of `InputFeature`s."""

    features = []
    for (ex_index, example) in enumerate(examples):
--- a/examples/lm_finetuning/pregenerate_training_data.py
+++ b/examples/lm_finetuning/pregenerate_training_data.py
@@ -49,7 +49,7 @@ class DocumentDatabase:
                self._precalculate_doc_weights()
            rand_start = self.doc_cumsum[current_idx]
            rand_end = rand_start + self.cumsum_max - self.doc_lengths[current_idx]
-            sentence_index = randint(rand_start, rand_end) % self.cumsum_max
+            sentence_index = randint(rand_start, rand_end-1) % self.cumsum_max
            sampled_doc_index = np.searchsorted(self.doc_cumsum, sentence_index, side='right')
        else:
            # If we don't use sentence weighting, then every doc has an equal chance to be chosen
--- a/examples/run_classifier.py
+++ b/examples/run_classifier.py
@@ -442,7 +442,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
        # sequence or the second sequence. The embedding vectors for `type=0` and
        # `type=1` were learned during pre-training and are added to the wordpiece
        # embedding vector (and position vector). This is not *strictly* necessary
-        # since the [SEP] token unambigiously separates the sequences, but it makes
+        # since the [SEP] token unambiguously separates the sequences, but it makes
        # it easier for the model to learn the concept of sequences.
        #
        # For classification tasks, the first vector (corresponding to [CLS]) is
--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -85,9 +85,9 @@ class SquadExample(object):
        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
        if self.start_position:
            s += ", start_position: %d" % (self.start_position)
-        if self.start_position:
+        if self.end_position:
            s += ", end_position: %d" % (self.end_position)
-        if self.start_position:
+        if self.is_impossible:
            s += ", is_impossible: %r" % (self.is_impossible)
        return s