Changed processor documentation architecture. Added documentation for GLUE
This commit is contained in:
committed by
Lysandre Debut
parent
c4ac7a76db
commit
ad4a393e2e
@@ -4,7 +4,22 @@ Processors
|
|||||||
This library includes processors for several traditional tasks. These processors can be used to process a dataset into
|
This library includes processors for several traditional tasks. These processors can be used to process a dataset into
|
||||||
examples that can be fed to a model.
|
examples that can be fed to a model.
|
||||||
|
|
||||||
``GLUE``
|
Processors
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
All processors follow the same architecture which is that of the
|
||||||
|
:class:`~pytorch_transformers.data.processors.utils.DataProcessor`. The processor returns a list
|
||||||
|
of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
|
||||||
|
|
||||||
|
.. autoclass:: pytorch_transformers.data.processors.utils.DataProcessor
|
||||||
|
:members:
|
||||||
|
|
||||||
|
|
||||||
|
.. autoclass:: pytorch_transformers.data.processors.utils.InputExample
|
||||||
|
:members:
|
||||||
|
|
||||||
|
|
||||||
|
GLUE
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
`General Language Understanding Evaluation (GLUE) <https://gluebenchmark.com/>`__ is a benchmark that evaluates
|
`General Language Understanding Evaluation (GLUE) <https://gluebenchmark.com/>`__ is a benchmark that evaluates
|
||||||
@@ -14,32 +29,21 @@ the performance of models across a diverse set of existing NLU tasks. It was rel
|
|||||||
This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched),
|
This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched),
|
||||||
CoLA, SST2, STSB, QQP, QNLI, RTE and WNLI.
|
CoLA, SST2, STSB, QQP, QNLI, RTE and WNLI.
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.MrpcProcessor
|
Those processors are:
|
||||||
:members:
|
- :class:`~pytorch_transformers.data.processors.utils.MrpcProcessor`
|
||||||
|
- :class:`~pytorch_transformers.data.processors.utils.MnliProcessor`
|
||||||
|
- :class:`~pytorch_transformers.data.processors.utils.MnliMismatchedProcessor`
|
||||||
|
- :class:`~pytorch_transformers.data.processors.utils.Sst2Processor`
|
||||||
|
- :class:`~pytorch_transformers.data.processors.utils.StsbProcessor`
|
||||||
|
- :class:`~pytorch_transformers.data.processors.utils.QqpProcessor`
|
||||||
|
- :class:`~pytorch_transformers.data.processors.utils.QnliProcessor`
|
||||||
|
- :class:`~pytorch_transformers.data.processors.utils.RteProcessor`
|
||||||
|
- :class:`~pytorch_transformers.data.processors.utils.WnliProcessor`
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.MnliProcessor
|
Additionally, the following method can be used to load values from a data file and convert them to a list of
|
||||||
:members:
|
:class:`~pytorch_transformers.data.processors.utils.InputExample`.
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.MnliMismatchedProcessor
|
.. automethod:: pytorch_transformers.data.processors.glue.glue_convert_examples_to_features
|
||||||
:members:
|
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.ColaProcessor
|
Example usage
|
||||||
:members:
|
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.Sst2Processor
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.StsbProcessor
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.QqpProcessor
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.QnliProcessor
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.RteProcessor
|
|
||||||
:members:
|
|
||||||
|
|
||||||
.. autoclass:: pytorch_transformers.data.processors.glue.WnliProcessor
|
|
||||||
:members:
|
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ if is_tf_available():
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def glue_convert_examples_to_features(examples, tokenizer,
|
def glue_convert_examples_to_features(examples, tokenizer,
|
||||||
max_length=512,
|
max_length=512,
|
||||||
task=None,
|
task=None,
|
||||||
@@ -36,7 +37,27 @@ def glue_convert_examples_to_features(examples, tokenizer,
|
|||||||
pad_token_segment_id=0,
|
pad_token_segment_id=0,
|
||||||
mask_padding_with_zero=True):
|
mask_padding_with_zero=True):
|
||||||
"""
|
"""
|
||||||
Loads a data file into a list of `InputBatch`s
|
Loads a data file into a list of ``InputFeatures``
|
||||||
|
|
||||||
|
Args:
|
||||||
|
examples: List of ``InputExamples`` or ``tf.data.Dataset`` containing the examples.
|
||||||
|
tokenizer: Instance of a tokenizer that will tokenize the examples
|
||||||
|
max_length: Maximum example length
|
||||||
|
task: GLUE task
|
||||||
|
label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
|
||||||
|
output_mode: String indicating the output mode. Either ``regression`` or ``classification``
|
||||||
|
pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
|
||||||
|
pad_token: Padding token
|
||||||
|
pad_token_segment_id: The segment ID for the padding token (It is usually 0, but can vary such as for XLNet where it is 4)
|
||||||
|
mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values
|
||||||
|
and by ``0`` for padded values. If set to ``False``, inverts it (``1`` for padded values, ``0`` for
|
||||||
|
actual values)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset``
|
||||||
|
containing the task-specific features. If the input is a list of ``InputExamples``, will return
|
||||||
|
a list of task-specific ``InputFeatures`` which can be fed to the model.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
is_tf_dataset = False
|
is_tf_dataset = False
|
||||||
if is_tf_available() and isinstance(examples, tf.data.Dataset):
|
if is_tf_available() and isinstance(examples, tf.data.Dataset):
|
||||||
|
|||||||
Reference in New Issue
Block a user