Doc styling (#8067)

* Important files * Styling them all * Revert "Styling them all" This reverts commit 7d029395fdae8513b8281cbc2a6c239f8093503e. * Syling them for realsies * Fix syntax error * Fix benchmark_utils * More fixes * Fix modeling auto and script * Remove new line * Fixes * More fixes * Fix more files * Style * Add FSMT * More fixes * More fixes * More fixes * More fixes * Fixes * More fixes * More fixes * Last fixes * Make sphinx happy
2020-10-26 18:26:02 -04:00
parent 04a17f8550
commit 08f534d2da
271 changed files with 9726 additions and 8991 deletions
--- a/src/transformers/data/data_collator.py
+++ b/src/transformers/data/data_collator.py
@@ -11,21 +11,22 @@ from ..tokenization_utils_base import BatchEncoding, PaddingStrategy, PreTrained
 InputDataClass = NewType("InputDataClass", Any)

 """
-A DataCollator is a function that takes a list of samples from a Dataset
-and collate them into a batch, as a dictionary of Tensors.
+A DataCollator is a function that takes a list of samples from a Dataset and collate them into a batch, as a dictionary
+of Tensors.
 """
 DataCollator = NewType("DataCollator", Callable[[List[InputDataClass]], Dict[str, torch.Tensor]])


 def default_data_collator(features: List[InputDataClass]) -> Dict[str, torch.Tensor]:
    """
-    Very simple data collator that simply collates batches of dict-like objects and erforms special handling for potential keys named:
+    Very simple data collator that simply collates batches of dict-like objects and erforms special handling for
+    potential keys named:

        - ``label``: handles a single value (int or float) per object
        - ``label_ids``: handles a list of values per object

-    Des not do any additional preprocessing: property names of the input object will be used as corresponding inputs to the model.
-    See glue and ner for example of how it's useful.
+    Des not do any additional preprocessing: property names of the input object will be used as corresponding inputs to
+    the model. See glue and ner for example of how it's useful.
    """

    # In this function we'll make the assumption that all `features` in the batch
@@ -73,11 +74,11 @@ class DataCollatorWithPadding:
        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
            The tokenizer used for encoding the data.
        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
-            Select a strategy to pad the returned sequences (according to the model's padding side and padding
-            index) among:
+            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
+            among:

-            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a
-              single sequence if provided).
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
              maximum acceptable input length for the model if that argument is not provided.
            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
@@ -87,8 +88,8 @@ class DataCollatorWithPadding:
        pad_to_multiple_of (:obj:`int`, `optional`):
            If set will pad the sequence to a multiple of the provided value.

-            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability
-            >= 7.5 (Volta).
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
    """

    tokenizer: PreTrainedTokenizerBase
@@ -117,6 +118,7 @@ class DataCollatorWithPadding:
 class DataCollatorForLanguageModeling:
    """
    Data collator used for language modeling.
+
    - collates batches of tensors, honoring their tokenizer's pad_token
    - preprocesses batches for masked language modeling
    """
@@ -198,6 +200,7 @@ class DataCollatorForLanguageModeling:
 class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):
    """
    Data collator used for language modeling.
+
    - collates batches of tensors, honoring their tokenizer's pad_token
    - preprocesses batches for masked language modeling
    """
@@ -275,8 +278,8 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):

    def mask_tokens(self, inputs: torch.Tensor, mask_labels: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
-        Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original.
-        Set 'mask_labels' means we use whole word mask (wwm), we directly mask idxs according to it's ref.
+        Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. Set
+        'mask_labels' means we use whole word mask (wwm), we directly mask idxs according to it's ref.
        """

        if self.tokenizer.mask_token is None:
@@ -316,6 +319,7 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):
 class DataCollatorForSOP(DataCollatorForLanguageModeling):
    """
    Data collator used for sentence order prediction task.
+
    - collates batches of tensors, honoring their tokenizer's pad_token
    - preprocesses batches for both masked language modeling and sentence order prediction
    """
@@ -342,8 +346,8 @@ class DataCollatorForSOP(DataCollatorForLanguageModeling):

    def mask_tokens(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
-        Prepare masked tokens inputs/labels/attention_mask for masked language modeling: 80% MASK, 10% random, 10% original.
-        N-gram not applied yet.
+        Prepare masked tokens inputs/labels/attention_mask for masked language modeling: 80% MASK, 10% random, 10%
+        original. N-gram not applied yet.
        """
        if self.tokenizer.mask_token is None:
            raise ValueError(
@@ -385,6 +389,7 @@ class DataCollatorForSOP(DataCollatorForLanguageModeling):
 class DataCollatorForPermutationLanguageModeling:
    """
    Data collator used for permutation language modeling.
+
    - collates batches of tensors, honoring their tokenizer's pad_token
    - preprocesses batches for permutation language modeling with procedures specific to XLNet
    """
@@ -425,10 +430,14 @@ class DataCollatorForPermutationLanguageModeling:
        The masked tokens to be predicted for a particular sequence are determined by the following algorithm:

            0. Start from the beginning of the sequence by setting ``cur_len = 0`` (number of tokens processed so far).
-            1. Sample a ``span_length`` from the interval ``[1, max_span_length]`` (length of span of tokens to be masked)
-            2. Reserve a context of length ``context_length = span_length / plm_probability`` to surround span to be masked
-            3. Sample a starting point ``start_index`` from the interval ``[cur_len, cur_len + context_length - span_length]`` and mask tokens ``start_index:start_index + span_length``
-            4. Set ``cur_len = cur_len + context_length``. If ``cur_len < max_len`` (i.e. there are tokens remaining in the sequence to be processed), repeat from Step 1.
+            1. Sample a ``span_length`` from the interval ``[1, max_span_length]`` (length of span of tokens to be
+               masked)
+            2. Reserve a context of length ``context_length = span_length / plm_probability`` to surround span to be
+               masked
+            3. Sample a starting point ``start_index`` from the interval ``[cur_len, cur_len + context_length -
+               span_length]`` and mask tokens ``start_index:start_index + span_length``
+            4. Set ``cur_len = cur_len + context_length``. If ``cur_len < max_len`` (i.e. there are tokens remaining in
+               the sequence to be processed), repeat from Step 1.
        """

        if self.tokenizer.mask_token is None:
@@ -517,8 +526,7 @@ class DataCollatorForPermutationLanguageModeling:
@dataclass
 class DataCollatorForNextSentencePrediction:
    """
-    Data collator used for next sentence prediction.
-    - collates examples which contains pre-generated negative examples
+    Data collator used for next sentence prediction. - collates examples which contains pre-generated negative examples
    - preprocesses batches for masked language modeling
    """

@@ -531,9 +539,12 @@ class DataCollatorForNextSentencePrediction:

    def __call__(self, examples: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        """
-        The input should contain negative examples, :class:`~transformers.DataCollatorForNextSentencePrediction` will not generate any negative examples.
+        The input should contain negative examples, :class:`~transformers.DataCollatorForNextSentencePrediction` will
+        not generate any negative examples
+
        Args:
            examples (:obj:`List[Dict]`): Each dictionary should have the following keys:
+
                  - ``tokens_a``: A sequence of tokens, which should appear before ``tokens_b`` in the text.
                  - ``tokens_b``: A sequence of tokens, which should appear after ``tokens_a`` in the text.
                  - ``is_random_next``: 1 if this pair is generated randomly, else 0.
--- a/src/transformers/data/datasets/glue.py
+++ b/src/transformers/data/datasets/glue.py
@@ -23,9 +23,8 @@ class GlueDataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.

-    Using `HfArgumentParser` we can turn this class
-    into argparse arguments to be able to specify them on
-    the command line.
+    Using `HfArgumentParser` we can turn this class into argparse arguments to be able to specify them on the command
+    line.
    """

    task_name: str = field(metadata={"help": "The name of the task to train on: " + ", ".join(glue_processors.keys())})
@@ -55,8 +54,7 @@ class Split(Enum):

 class GlueDataset(Dataset):
    """
-    This will be superseded by a framework-agnostic approach
-    soon.
+    This will be superseded by a framework-agnostic approach soon.
    """

    args: GlueDataTrainingArguments
--- a/src/transformers/data/datasets/language_modeling.py
+++ b/src/transformers/data/datasets/language_modeling.py
@@ -19,8 +19,7 @@ logger = logging.get_logger(__name__)

 class TextDataset(Dataset):
    """
-    This will be superseded by a framework-agnostic approach
-    soon.
+    This will be superseded by a framework-agnostic approach soon.
    """

    def __init__(
@@ -91,8 +90,7 @@ class TextDataset(Dataset):

 class LineByLineTextDataset(Dataset):
    """
-    This will be superseded by a framework-agnostic approach
-    soon.
+    This will be superseded by a framework-agnostic approach soon.
    """

    def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int):
@@ -118,8 +116,7 @@ class LineByLineTextDataset(Dataset):

 class LineByLineWithRefDataset(Dataset):
    """
-    This will be superseded by a framework-agnostic approach
-    soon.
+    This will be superseded by a framework-agnostic approach soon.
    """

    def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, ref_path: str):
@@ -294,8 +291,7 @@ class LineByLineWithSOPTextDataset(Dataset):

 class TextDatasetForNextSentencePrediction(Dataset):
    """
-    This will be superseded by a framework-agnostic approach
-    soon.
+    This will be superseded by a framework-agnostic approach soon.
    """

    def __init__(
--- a/src/transformers/data/datasets/squad.py
+++ b/src/transformers/data/datasets/squad.py
@@ -86,8 +86,7 @@ class Split(Enum):

 class SquadDataset(Dataset):
    """
-    This will be superseded by a framework-agnostic approach
-    soon.
+    This will be superseded by a framework-agnostic approach soon.
    """

    args: SquadDataTrainingArguments
--- a/src/transformers/data/metrics/squad_metrics.py
+++ b/src/transformers/data/metrics/squad_metrics.py
@@ -1,10 +1,10 @@
-""" Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was
-modified by XLNet authors to update `find_best_threshold` scripts for SQuAD V2.0
+"""
+ Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was modified by XLNet authors to
+ update `find_best_threshold` scripts for SQuAD V2.0

-In addition to basic functionality, we also compute additional statistics and
-plot precision-recall curves if an additional na_prob.json file is provided.
-This file is expected to map question ID's to the model's predicted probability
-that a question is unanswerable.
+In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an
+additional na_prob.json file is provided. This file is expected to map question ID's to the model's predicted
+probability that a question is unanswerable.
 """


@@ -589,8 +589,9 @@ def compute_predictions_log_probs(
    tokenizer,
    verbose_logging,
 ):
-    """XLNet write prediction logic (more complex than Bert's).
-    Write final predictions to the json file and log-odds of null if needed.
+    """
+    XLNet write prediction logic (more complex than Bert's). Write final predictions to the json file and log-odds of
+    null if needed.

    Requires utils_squad_evaluate.py
    """
--- a/src/transformers/data/processors/glue.py
+++ b/src/transformers/data/processors/glue.py
@@ -52,9 +52,9 @@ def glue_convert_examples_to_features(
        output_mode: String indicating the output mode. Either ``regression`` or ``classification``

    Returns:
-        If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset``
-        containing the task-specific features. If the input is a list of ``InputExamples``, will return
-        a list of task-specific ``InputFeatures`` which can be fed to the model.
+        If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset`` containing the
+        task-specific features. If the input is a list of ``InputExamples``, will return a list of task-specific
+        ``InputFeatures`` which can be fed to the model.

    """
    if is_tf_available() and isinstance(examples, tf.data.Dataset):
--- a/src/transformers/data/processors/squad.py
+++ b/src/transformers/data/processors/squad.py
@@ -314,8 +314,8 @@ def squad_convert_examples_to_features(
    tqdm_enabled=True,
 ):
    """
-    Converts a list of examples into a list of features that can be directly given as input to a model.
-    It is model-dependant and takes advantage of many of the tokenizer's features to create the model's inputs.
+    Converts a list of examples into a list of features that can be directly given as input to a model. It is
+    model-dependant and takes advantage of many of the tokenizer's features to create the model's inputs.

    Args:
        examples: list of :class:`~transformers.data.processors.squad.SquadExample`
@@ -326,8 +326,7 @@ def squad_convert_examples_to_features(
        is_training: whether to create features for model evaluation or model training.
        padding_strategy: Default to "max_length". Which padding strategy to use
        return_dataset: Default False. Either 'pt' or 'tf'.
-            if 'pt': returns a torch.data.TensorDataset,
-            if 'tf': returns a tf.data.Dataset
+            if 'pt': returns a torch.data.TensorDataset, if 'tf': returns a tf.data.Dataset
        threads: multiple processing threadsa-smi


@@ -528,8 +527,8 @@ def squad_convert_examples_to_features(

 class SquadProcessor(DataProcessor):
    """
-    Processor for the SQuAD data set.
-    Overriden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and version 2.0 of SQuAD, respectively.
+    Processor for the SQuAD data set. Overriden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
+    version 2.0 of SQuAD, respectively.
    """

    train_file = None
@@ -745,9 +744,9 @@ class SquadExample:

 class SquadFeatures:
    """
-    Single squad example features to be fed to a model.
-    Those features are model-specific and can be crafted from :class:`~transformers.data.processors.squad.SquadExample`
-    using the :method:`~transformers.data.processors.squad.squad_convert_examples_to_features` method.
+    Single squad example features to be fed to a model. Those features are model-specific and can be crafted from
+    :class:`~transformers.data.processors.squad.SquadExample` using the
+    :method:`~transformers.data.processors.squad.squad_convert_examples_to_features` method.

    Args:
        input_ids: Indices of input sequence tokens in the vocabulary.
--- a/src/transformers/data/processors/utils.py
+++ b/src/transformers/data/processors/utils.py
@@ -55,14 +55,13 @@ class InputExample:
@dataclass(frozen=True)
 class InputFeatures:
    """
-    A single set of features of data.
-    Property names are the same names as the corresponding inputs to a model.
+    A single set of features of data. Property names are the same names as the corresponding inputs to a model.

    Args:
        input_ids: Indices of input sequence tokens in the vocabulary.
        attention_mask: Mask to avoid performing attention on padding token indices.
-            Mask values selected in ``[0, 1]``:
-            Usually  ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
+            Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded)
+            tokens.
        token_type_ids: (Optional) Segment token indices to indicate first and second
            portions of the inputs. Only some models use them.
        label: (Optional) Label corresponding to the input. Int for classification problems,
@@ -83,7 +82,8 @@ class DataProcessor:
    """Base class for data converters for sequence classification data sets."""

    def get_example_from_tensor_dict(self, tensor_dict):
-        """Gets an example from a dict with tensorflow tensors.
+        """
+        Gets an example from a dict with tensorflow tensors.

        Args:
            tensor_dict: Keys and values should match the corresponding Glue
@@ -108,8 +108,10 @@ class DataProcessor:
        raise NotImplementedError()

    def tfds_map(self, example):
-        """Some tensorflow_datasets datasets are not formatted the same way the GLUE datasets are.
-        This method converts examples to the correct format."""
+        """
+        Some tensorflow_datasets datasets are not formatted the same way the GLUE datasets are. This method converts
+        examples to the correct format.
+        """
        if len(self.get_labels()) > 1:
            example.label = self.get_labels()[int(example.label)]
        return example
@@ -253,9 +255,9 @@ class SingleSentenceClassificationProcessor(DataProcessor):
                actual values)

        Returns:
-            If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset``
-            containing the task-specific features. If the input is a list of ``InputExamples``, will return
-            a list of task-specific ``InputFeatures`` which can be fed to the model.
+            If the ``examples`` input is a ``tf.data.Dataset``, will return a ``tf.data.Dataset`` containing the
+            task-specific features. If the input is a list of ``InputExamples``, will return a list of task-specific
+            ``InputFeatures`` which can be fed to the model.

        """
        if max_length is None:
--- a/src/transformers/data/processors/xnli.py
+++ b/src/transformers/data/processors/xnli.py
@@ -26,8 +26,10 @@ logger = logging.get_logger(__name__)


 class XnliProcessor(DataProcessor):
-    """Processor for the XNLI dataset.
-    Adapted from https://github.com/google-research/bert/blob/f39e881b169b9d53bea03d2d341b31707a6c052b/run_classifier.py#L207"""
+    """
+    Processor for the XNLI dataset. Adapted from
+    https://github.com/google-research/bert/blob/f39e881b169b9d53bea03d2d341b31707a6c052b/run_classifier.py#L207
+    """

    def __init__(self, language, train_language=None):
        self.language = language