Updated doc for InputExample and InputFeatures

2019-09-25 08:39:33 -04:00
parent ad4a393e2e
commit 36f592cc82
2 changed files with 30 additions and 14 deletions
--- a/docs/source/main_classes/processors.rst
+++ b/docs/source/main_classes/processors.rst
@@ -9,7 +9,9 @@ Processors

 All processors follow the same architecture which is that of the
 :class:`~pytorch_transformers.data.processors.utils.DataProcessor`. The processor returns a list
-of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
+of :class:`~pytorch_transformers.data.processors.utils.InputExample`. These
+:class:`~pytorch_transformers.data.processors.utils.InputExample` can be converted to
+:class:`~pytorch_transformers.data.processors.utils.InputFeatures` in order to be fed to the model.

 .. autoclass:: pytorch_transformers.data.processors.utils.DataProcessor
    :members:
@@ -19,6 +21,10 @@ of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
    :members:


+.. autoclass:: pytorch_transformers.data.processors.utils.InputFeatures
+    :members:
+
+
 GLUE
 ~~~~~~~~~~~~~~~~~~~~~

--- a/transformers/data/processors/utils.py
+++ b/transformers/data/processors/utils.py
@@ -20,19 +20,19 @@ import copy
 import json

 class InputExample(object):
-    """A single training/test example for simple sequence classification."""
-    def __init__(self, guid, text_a, text_b=None, label=None):
-        """Constructs a InputExample.
+    """
+    A single training/test example for simple sequence classification.

-        Args:
-            guid: Unique id for the example.
-            text_a: string. The untokenized text of the first sequence. For single
-            sequence tasks, only this sequence must be specified.
-            text_b: (Optional) string. The untokenized text of the second sequence.
-            Only must be specified for sequence pair tasks.
-            label: (Optional) string. The label of the example. This should be
-            specified for train and dev examples, but not for test examples.
-        """
+    Args:
+        guid: Unique id for the example.
+        text_a: string. The untokenized text of the first sequence. For single
+        sequence tasks, only this sequence must be specified.
+        text_b: (Optional) string. The untokenized text of the second sequence.
+        Only must be specified for sequence pair tasks.
+        label: (Optional) string. The label of the example. This should be
+        specified for train and dev examples, but not for test examples.
+    """
+    def __init__(self, guid, text_a, text_b=None, label=None):
        self.guid = guid
        self.text_a = text_a
        self.text_b = text_b
@@ -52,7 +52,17 @@ class InputExample(object):


 class InputFeatures(object):
-    """A single set of features of data."""
+    """
+    A single set of features of data.
+
+    Args:
+        input_ids: Indices of input sequence tokens in the vocabulary.
+        attention_mask: Mask to avoid performing attention on padding token indices.
+            Mask values selected in ``[0, 1]``:
+            Usually  ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
+        token_type_ids: Segment token indices to indicate first and second portions of the inputs.
+        label: Label corresponding to the input
+    """

    def __init__(self, input_ids, attention_mask, token_type_ids, label):
        self.input_ids = input_ids