Updated doc for InputExample and InputFeatures
This commit is contained in:
committed by
Lysandre Debut
parent
ad4a393e2e
commit
36f592cc82
@@ -9,7 +9,9 @@ Processors
|
||||
|
||||
All processors follow the same architecture which is that of the
|
||||
:class:`~pytorch_transformers.data.processors.utils.DataProcessor`. The processor returns a list
|
||||
of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
|
||||
of :class:`~pytorch_transformers.data.processors.utils.InputExample`. These
|
||||
:class:`~pytorch_transformers.data.processors.utils.InputExample` can be converted to
|
||||
:class:`~pytorch_transformers.data.processors.utils.InputFeatures` in order to be fed to the model.
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.utils.DataProcessor
|
||||
:members:
|
||||
@@ -19,6 +21,10 @@ of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
|
||||
:members:
|
||||
|
||||
|
||||
.. autoclass:: pytorch_transformers.data.processors.utils.InputFeatures
|
||||
:members:
|
||||
|
||||
|
||||
GLUE
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
||||
@@ -20,19 +20,19 @@ import copy
|
||||
import json
|
||||
|
||||
class InputExample(object):
|
||||
"""A single training/test example for simple sequence classification."""
|
||||
def __init__(self, guid, text_a, text_b=None, label=None):
|
||||
"""Constructs a InputExample.
|
||||
"""
|
||||
A single training/test example for simple sequence classification.
|
||||
|
||||
Args:
|
||||
guid: Unique id for the example.
|
||||
text_a: string. The untokenized text of the first sequence. For single
|
||||
sequence tasks, only this sequence must be specified.
|
||||
text_b: (Optional) string. The untokenized text of the second sequence.
|
||||
Only must be specified for sequence pair tasks.
|
||||
label: (Optional) string. The label of the example. This should be
|
||||
specified for train and dev examples, but not for test examples.
|
||||
"""
|
||||
Args:
|
||||
guid: Unique id for the example.
|
||||
text_a: string. The untokenized text of the first sequence. For single
|
||||
sequence tasks, only this sequence must be specified.
|
||||
text_b: (Optional) string. The untokenized text of the second sequence.
|
||||
Only must be specified for sequence pair tasks.
|
||||
label: (Optional) string. The label of the example. This should be
|
||||
specified for train and dev examples, but not for test examples.
|
||||
"""
|
||||
def __init__(self, guid, text_a, text_b=None, label=None):
|
||||
self.guid = guid
|
||||
self.text_a = text_a
|
||||
self.text_b = text_b
|
||||
@@ -52,7 +52,17 @@ class InputExample(object):
|
||||
|
||||
|
||||
class InputFeatures(object):
|
||||
"""A single set of features of data."""
|
||||
"""
|
||||
A single set of features of data.
|
||||
|
||||
Args:
|
||||
input_ids: Indices of input sequence tokens in the vocabulary.
|
||||
attention_mask: Mask to avoid performing attention on padding token indices.
|
||||
Mask values selected in ``[0, 1]``:
|
||||
Usually ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
|
||||
token_type_ids: Segment token indices to indicate first and second portions of the inputs.
|
||||
label: Label corresponding to the input
|
||||
"""
|
||||
|
||||
def __init__(self, input_ids, attention_mask, token_type_ids, label):
|
||||
self.input_ids = input_ids
|
||||
|
||||
Reference in New Issue
Block a user