Updated doc for InputExample and InputFeatures

This commit is contained in:
LysandreJik
2019-09-25 08:39:33 -04:00
committed by Lysandre Debut
parent ad4a393e2e
commit 36f592cc82
2 changed files with 30 additions and 14 deletions

View File

@@ -9,7 +9,9 @@ Processors
All processors follow the same architecture which is that of the
:class:`~pytorch_transformers.data.processors.utils.DataProcessor`. The processor returns a list
of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
of :class:`~pytorch_transformers.data.processors.utils.InputExample`. These
:class:`~pytorch_transformers.data.processors.utils.InputExample` can be converted to
:class:`~pytorch_transformers.data.processors.utils.InputFeatures` in order to be fed to the model.
.. autoclass:: pytorch_transformers.data.processors.utils.DataProcessor
:members:
@@ -19,6 +21,10 @@ of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
:members:
.. autoclass:: pytorch_transformers.data.processors.utils.InputFeatures
:members:
GLUE
~~~~~~~~~~~~~~~~~~~~~

View File

@@ -20,9 +20,8 @@ import copy
import json
class InputExample(object):
"""A single training/test example for simple sequence classification."""
def __init__(self, guid, text_a, text_b=None, label=None):
"""Constructs a InputExample.
"""
A single training/test example for simple sequence classification.
Args:
guid: Unique id for the example.
@@ -33,6 +32,7 @@ class InputExample(object):
label: (Optional) string. The label of the example. This should be
specified for train and dev examples, but not for test examples.
"""
def __init__(self, guid, text_a, text_b=None, label=None):
self.guid = guid
self.text_a = text_a
self.text_b = text_b
@@ -52,7 +52,17 @@ class InputExample(object):
class InputFeatures(object):
"""A single set of features of data."""
"""
A single set of features of data.
Args:
input_ids: Indices of input sequence tokens in the vocabulary.
attention_mask: Mask to avoid performing attention on padding token indices.
Mask values selected in ``[0, 1]``:
Usually ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
token_type_ids: Segment token indices to indicate first and second portions of the inputs.
label: Label corresponding to the input
"""
def __init__(self, input_ids, attention_mask, token_type_ids, label):
self.input_ids = input_ids