From ad4a393e2e59951a0edbec0b9b3be852dd086cc7 Mon Sep 17 00:00:00 2001
From: LysandreJik <lysandre.debut@reseau.eseo.fr>
Date: Wed, 25 Sep 2019 08:30:07 -0400
Subject: [PATCH] Changed processor documentation architecture. Added
 documentation for GLUE

---
 docs/source/main_classes/processors.rst | 62 +++++++++++++------------
 transformers/data/processors/glue.py    | 23 ++++++++-
 2 files changed, 55 insertions(+), 30 deletions(-)

diff --git a/docs/source/main_classes/processors.rst b/docs/source/main_classes/processors.rst
index 12e5339ddb..d65f48af83 100644
--- a/docs/source/main_classes/processors.rst
+++ b/docs/source/main_classes/processors.rst
@@ -4,42 +4,46 @@ Processors
 This library includes processors for several traditional tasks. These processors can be used to process a dataset into
 examples that can be fed to a model.
 
-``GLUE``
+Processors
 ~~~~~~~~~~~~~~~~~~~~~
 
-`General Language Understanding Evaluation (GLUE)<https://gluebenchmark.com/>`__ is a benchmark that evaluates
+All processors follow the same architecture which is that of the
+:class:`~pytorch_transformers.data.processors.utils.DataProcessor`. The processor returns a list
+of :class:`~pytorch_transformers.data.processors.utils.InputExample`.
+
+.. autoclass:: pytorch_transformers.data.processors.utils.DataProcessor
+    :members:
+
+
+.. autoclass:: pytorch_transformers.data.processors.utils.InputExample
+    :members:
+
+
+GLUE
+~~~~~~~~~~~~~~~~~~~~~
+
+`General Language Understanding Evaluation (GLUE) <https://gluebenchmark.com/>`__ is a benchmark that evaluates
 the performance of models across a diverse set of existing NLU tasks. It was released together with the paper
-`GLUE: A multi-task benchmark and analysis platform for natural language understanding<https://openreview.net/pdf?id=rJ4km2R5t7>`__
+`GLUE: A multi-task benchmark and analysis platform for natural language understanding <https://openreview.net/pdf?id=rJ4km2R5t7>`__
 
 This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched),
 CoLA, SST2, STSB, QQP, QNLI, RTE and WNLI.
 
-.. autoclass:: pytorch_transformers.data.processors.glue.MrpcProcessor
-    :members:
+Those processors are:
+    - :class:`~pytorch_transformers.data.processors.utils.MrpcProcessor`
+    - :class:`~pytorch_transformers.data.processors.utils.MnliProcessor`
+    - :class:`~pytorch_transformers.data.processors.utils.MnliMismatchedProcessor`
+    - :class:`~pytorch_transformers.data.processors.utils.Sst2Processor`
+    - :class:`~pytorch_transformers.data.processors.utils.StsbProcessor`
+    - :class:`~pytorch_transformers.data.processors.utils.QqpProcessor`
+    - :class:`~pytorch_transformers.data.processors.utils.QnliProcessor`
+    - :class:`~pytorch_transformers.data.processors.utils.RteProcessor`
+    - :class:`~pytorch_transformers.data.processors.utils.WnliProcessor`
 
-.. autoclass:: pytorch_transformers.data.processors.glue.MnliProcessor
-    :members:
+Additionally, the following method  can be used to load values from a data file and convert them to a list of
+:class:`~pytorch_transformers.data.processors.utils.InputExample`.
 
-.. autoclass:: pytorch_transformers.data.processors.glue.MnliMismatchedProcessor
-    :members:
+.. automethod:: pytorch_transformers.data.processors.glue.glue_convert_examples_to_features
 
-.. autoclass:: pytorch_transformers.data.processors.glue.ColaProcessor
-    :members:
-
-.. autoclass:: pytorch_transformers.data.processors.glue.Sst2Processor
-    :members:
-
-.. autoclass:: pytorch_transformers.data.processors.glue.StsbProcessor
-    :members:
-
-.. autoclass:: pytorch_transformers.data.processors.glue.QqpProcessor
-    :members:
-
-.. autoclass:: pytorch_transformers.data.processors.glue.QnliProcessor
-    :members:
-
-.. autoclass:: pytorch_transformers.data.processors.glue.RteProcessor
-    :members:
-
-.. autoclass:: pytorch_transformers.data.processors.glue.WnliProcessor
-    :members:
+Example usage
+^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/transformers/data/processors/glue.py b/transformers/data/processors/glue.py
index 3010ce9840..2322f58604 100644
--- a/transformers/data/processors/glue.py
+++ b/transformers/data/processors/glue.py
@@ -26,6 +26,7 @@ if is_tf_available():
 
 logger = logging.getLogger(__name__)
 
+
 def glue_convert_examples_to_features(examples, tokenizer,
                                       max_length=512,
                                       task=None,
@@ -36,7 +37,27 @@ def glue_convert_examples_to_features(examples, tokenizer,
                                       pad_token_segment_id=0,
                                       mask_padding_with_zero=True):
     """
-    Loads a data file into a list of `InputBatch`s
+    Loads a data file into a list of ``InputFeatures``
+
+    Args:
+        examples: List of ``InputExamples`` or ``tf.data.Dataset`` containing the examples.
+        tokenizer: Instance of a tokenizer that will tokenize the examples
+        max_length: Maximum example length
+        task: GLUE task
+        label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
+        output_mode: String indicating the output mode. Either ``regression`` or ``classification``
+        pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
+        pad_token: Padding token
+        pad_token_segment_id: The segment ID for the padding token (It is usually 0, but can vary such as for XLNet where it is 4)
+        mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values
+            and by ``0`` for padded values. If set to ``False``, inverts it (``1`` for padded values, ``0`` for
+            actual values)
+
+    Returns:
+        If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset``
+        containing the task-specific features. If the input is a list of ``InputExamples``, will return
+        a list of task-specific ``InputFeatures`` which can be fed to the model.
+
     """
     is_tf_dataset = False
     if is_tf_available() and isinstance(examples, tf.data.Dataset):