Upgrade styler to better handle lists (#9423)

* Add missing lines before a new list. * Update doc styler and restyle some files. * Fix docstrings of LED and Longformer
2021-01-06 07:46:17 -05:00
parent b7e548976f
commit bcb55d33ce
10 changed files with 66 additions and 32 deletions
--- a/src/transformers/models/dpr/modeling_tf_dpr.py
+++ b/src/transformers/models/dpr/modeling_tf_dpr.py
@@ -443,16 +443,22 @@ TF_DPR_START_DOCSTRING = r"""

    .. note::

-        TF 2.0 models accepts two formats as inputs: - having all inputs as keyword arguments (like PyTorch models), or
-        - having all inputs as a list, tuple or dict in the first positional arguments. This second option is useful
-        when using :meth:`tf.keras.Model.fit` method which currently requires having all the tensors in the first
-        argument of the model call function: :obj:`model(inputs)`. If you choose this second option, there are three
-        possibilities you can use to gather all the input Tensors in the first positional argument : - a single Tensor
-        with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)` - a list of varying length with one or
-        several input Tensors IN THE ORDER given in the docstring: :obj:`model([input_ids, attention_mask])` or
-        :obj:`model([input_ids, attention_mask, token_type_ids])` - a dictionary with one or several input Tensors
-        associated to the input names given in the docstring: :obj:`model({"input_ids": input_ids, "token_type_ids":
-        token_type_ids})`
+        TF 2.0 models accepts two formats as inputs:
+
+        - having all inputs as keyword arguments (like PyTorch models), or
+        - having all inputs as a list, tuple or dict in the first positional arguments.
+
+        This second option is useful when using :meth:`tf.keras.Model.fit` method which currently requires having all
+        the tensors in the first argument of the model call function: :obj:`model(inputs)`.
+
+        If you choose this second option, there are three possibilities you can use to gather all the input Tensors in
+        the first positional argument :
+
+        - a single Tensor with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)`
+        - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
+          :obj:`model([input_ids, attention_mask])` or :obj:`model([input_ids, attention_mask, token_type_ids])`
+        - a dictionary with one or several input Tensors associated to the input names given in the docstring:
+          :obj:`model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Parameters:
        config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model.
--- a/src/transformers/models/layoutlm/modeling_layoutlm.py
+++ b/src/transformers/models/layoutlm/modeling_layoutlm.py
@@ -638,8 +638,8 @@ LAYOUTLM_INPUTS_DOCSTRING = r"""

            `What are input IDs? <../glossary.html#input-ids>`__
        bbox (:obj:`torch.LongTensor` of shape :obj:`({0}, 4)`, `optional`):
-            Bounding Boxes of each input sequence tokens. Selected in the range ``[0, config.max_2d_position_embeddings
-            - 1]``.
+            Bounding Boxes of each input sequence tokens. Selected in the range ``[0,
+            config.max_2d_position_embeddings-1]``.
        attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
            Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for
            tokens that are NOT MASKED, ``0`` for MASKED tokens.
--- a/src/transformers/models/led/modeling_led.py
+++ b/src/transformers/models/led/modeling_led.py
@@ -172,11 +172,11 @@ class LEDEncoderSelfAttention(nn.Module):
        :class:`LEDEncoderSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
        `attention_window` happens in :meth:`LEDEncoderModel.forward` to avoid redoing the padding on each layer.

-        The `attention_mask` is changed in :meth:`BertModel.forward` from 0, 1, 2 to -ve: no attention
-
-              0: local attention
-            +ve: global attention
+        The `attention_mask` is changed in :meth:`LEDEncoderModel.forward` from 0, 1, 2 to:

+            * -10000: no attention
+            * 0: local attention
+            * +10000: global attention
        """
        hidden_states = hidden_states.transpose(0, 1)

--- a/src/transformers/models/led/modeling_tf_led.py
+++ b/src/transformers/models/led/modeling_tf_led.py
@@ -190,11 +190,11 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer):
        LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
        `attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer.

-        The `attention_mask` is changed in `BertModel.forward` from 0, 1, 2 to -ve: no attention
-
-              0: local attention
-            +ve: global attention
+        The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:

+            * -10000: no attention
+            * 0: local attention
+            * +10000: global attention
        """
        # retrieve input args
        (
--- a/src/transformers/models/longformer/modeling_longformer.py
+++ b/src/transformers/models/longformer/modeling_longformer.py
@@ -561,11 +561,11 @@ class LongformerSelfAttention(nn.Module):
        :class:`LongformerSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
        `attention_window` happens in :meth:`LongformerModel.forward` to avoid redoing the padding on each layer.

-        The `attention_mask` is changed in :meth:`BertModel.forward` from 0, 1, 2 to -ve: no attention
-
-              0: local attention
-            +ve: global attention
+        The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:

+            * -10000: no attention
+            * 0: local attention
+            * +10000: global attention
        """
        hidden_states = hidden_states.transpose(0, 1)

--- a/src/transformers/models/longformer/modeling_tf_longformer.py
+++ b/src/transformers/models/longformer/modeling_tf_longformer.py
@@ -768,11 +768,11 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer):
        LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
        `attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer.

-        The `attention_mask` is changed in `BertModel.forward` from 0, 1, 2 to -ve: no attention
-
-              0: local attention
-            +ve: global attention
+        The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:

+            * -10000: no attention
+            * 0: local attention
+            * +10000: global attention
        """
        # retrieve input args
        (
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -522,6 +522,7 @@ class CaptureLogger:
    Context manager to capture `logging` streams

    Args:
+
    - logger: 'logging` logger object

    Results:
@@ -851,9 +852,10 @@ def pytest_terminal_summary_main(tr, id):
    there.

    Args:
+
    - tr: `terminalreporter` passed from `conftest.py`
-    - id: unique id like `tests` or `examples` that will be incorporated into the final reports
-      filenames - this is needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other.
+    - id: unique id like `tests` or `examples` that will be incorporated into the final reports filenames - this is
+      needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other.

    NB: this functions taps into a private _pytest API and while unlikely, it could break should
    pytest do internal changes - also it calls default internal methods of terminalreporter which
--- a/src/transformers/trainer_utils.py
+++ b/src/transformers/trainer_utils.py
@@ -191,6 +191,7 @@ def speed_metrics(split, start_time, num_samples=None):
    should be run immediately after the operation to be measured has completed.

    Args:
+
    - split: name to prefix metric (like train, eval, test...)
    - start_time: operation start time
    - num_samples: number of samples processed