Upgrade styler to better handle lists (#9423)
* Add missing lines before a new list. * Update doc styler and restyle some files. * Fix docstrings of LED and Longformer
This commit is contained in:
@@ -40,7 +40,6 @@ Tips:
|
|||||||
*Longformer*'s *chunked self-attention* layer. :class:`~transformers.LEDTokenizer` is an alias of
|
*Longformer*'s *chunked self-attention* layer. :class:`~transformers.LEDTokenizer` is an alias of
|
||||||
:class:`~transformers.BartTokenizer`.
|
:class:`~transformers.BartTokenizer`.
|
||||||
- LED works very well on long-range *sequence-to-sequence* tasks where the ``input_ids`` largely exceed a length of
|
- LED works very well on long-range *sequence-to-sequence* tasks where the ``input_ids`` largely exceed a length of
|
||||||
|
|
||||||
1024 tokens.
|
1024 tokens.
|
||||||
- LED pads the ``input_ids`` to be a multiple of ``config.attention_window`` if required. Therefore a small speed-up is
|
- LED pads the ``input_ids`` to be a multiple of ``config.attention_window`` if required. Therefore a small speed-up is
|
||||||
gained, when :class:`~transformers.LEDTokenizer` is used with the ``pad_to_multiple_of`` argument.
|
gained, when :class:`~transformers.LEDTokenizer` is used with the ``pad_to_multiple_of`` argument.
|
||||||
|
|||||||
@@ -443,16 +443,22 @@ TF_DPR_START_DOCSTRING = r"""
|
|||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
TF 2.0 models accepts two formats as inputs: - having all inputs as keyword arguments (like PyTorch models), or
|
TF 2.0 models accepts two formats as inputs:
|
||||||
- having all inputs as a list, tuple or dict in the first positional arguments. This second option is useful
|
|
||||||
when using :meth:`tf.keras.Model.fit` method which currently requires having all the tensors in the first
|
- having all inputs as keyword arguments (like PyTorch models), or
|
||||||
argument of the model call function: :obj:`model(inputs)`. If you choose this second option, there are three
|
- having all inputs as a list, tuple or dict in the first positional arguments.
|
||||||
possibilities you can use to gather all the input Tensors in the first positional argument : - a single Tensor
|
|
||||||
with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)` - a list of varying length with one or
|
This second option is useful when using :meth:`tf.keras.Model.fit` method which currently requires having all
|
||||||
several input Tensors IN THE ORDER given in the docstring: :obj:`model([input_ids, attention_mask])` or
|
the tensors in the first argument of the model call function: :obj:`model(inputs)`.
|
||||||
:obj:`model([input_ids, attention_mask, token_type_ids])` - a dictionary with one or several input Tensors
|
|
||||||
associated to the input names given in the docstring: :obj:`model({"input_ids": input_ids, "token_type_ids":
|
If you choose this second option, there are three possibilities you can use to gather all the input Tensors in
|
||||||
token_type_ids})`
|
the first positional argument :
|
||||||
|
|
||||||
|
- a single Tensor with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)`
|
||||||
|
- a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
|
||||||
|
:obj:`model([input_ids, attention_mask])` or :obj:`model([input_ids, attention_mask, token_type_ids])`
|
||||||
|
- a dictionary with one or several input Tensors associated to the input names given in the docstring:
|
||||||
|
:obj:`model({"input_ids": input_ids, "token_type_ids": token_type_ids})`
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model.
|
config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model.
|
||||||
|
|||||||
@@ -638,8 +638,8 @@ LAYOUTLM_INPUTS_DOCSTRING = r"""
|
|||||||
|
|
||||||
`What are input IDs? <../glossary.html#input-ids>`__
|
`What are input IDs? <../glossary.html#input-ids>`__
|
||||||
bbox (:obj:`torch.LongTensor` of shape :obj:`({0}, 4)`, `optional`):
|
bbox (:obj:`torch.LongTensor` of shape :obj:`({0}, 4)`, `optional`):
|
||||||
Bounding Boxes of each input sequence tokens. Selected in the range ``[0, config.max_2d_position_embeddings
|
Bounding Boxes of each input sequence tokens. Selected in the range ``[0,
|
||||||
- 1]``.
|
config.max_2d_position_embeddings-1]``.
|
||||||
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
|
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
|
||||||
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for
|
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for
|
||||||
tokens that are NOT MASKED, ``0`` for MASKED tokens.
|
tokens that are NOT MASKED, ``0`` for MASKED tokens.
|
||||||
|
|||||||
@@ -172,11 +172,11 @@ class LEDEncoderSelfAttention(nn.Module):
|
|||||||
:class:`LEDEncoderSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
|
:class:`LEDEncoderSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
|
||||||
`attention_window` happens in :meth:`LEDEncoderModel.forward` to avoid redoing the padding on each layer.
|
`attention_window` happens in :meth:`LEDEncoderModel.forward` to avoid redoing the padding on each layer.
|
||||||
|
|
||||||
The `attention_mask` is changed in :meth:`BertModel.forward` from 0, 1, 2 to -ve: no attention
|
The `attention_mask` is changed in :meth:`LEDEncoderModel.forward` from 0, 1, 2 to:
|
||||||
|
|
||||||
0: local attention
|
|
||||||
+ve: global attention
|
|
||||||
|
|
||||||
|
* -10000: no attention
|
||||||
|
* 0: local attention
|
||||||
|
* +10000: global attention
|
||||||
"""
|
"""
|
||||||
hidden_states = hidden_states.transpose(0, 1)
|
hidden_states = hidden_states.transpose(0, 1)
|
||||||
|
|
||||||
|
|||||||
@@ -190,11 +190,11 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer):
|
|||||||
LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
|
LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
|
||||||
`attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer.
|
`attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer.
|
||||||
|
|
||||||
The `attention_mask` is changed in `BertModel.forward` from 0, 1, 2 to -ve: no attention
|
The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:
|
||||||
|
|
||||||
0: local attention
|
|
||||||
+ve: global attention
|
|
||||||
|
|
||||||
|
* -10000: no attention
|
||||||
|
* 0: local attention
|
||||||
|
* +10000: global attention
|
||||||
"""
|
"""
|
||||||
# retrieve input args
|
# retrieve input args
|
||||||
(
|
(
|
||||||
|
|||||||
@@ -561,11 +561,11 @@ class LongformerSelfAttention(nn.Module):
|
|||||||
:class:`LongformerSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
|
:class:`LongformerSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
|
||||||
`attention_window` happens in :meth:`LongformerModel.forward` to avoid redoing the padding on each layer.
|
`attention_window` happens in :meth:`LongformerModel.forward` to avoid redoing the padding on each layer.
|
||||||
|
|
||||||
The `attention_mask` is changed in :meth:`BertModel.forward` from 0, 1, 2 to -ve: no attention
|
The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:
|
||||||
|
|
||||||
0: local attention
|
|
||||||
+ve: global attention
|
|
||||||
|
|
||||||
|
* -10000: no attention
|
||||||
|
* 0: local attention
|
||||||
|
* +10000: global attention
|
||||||
"""
|
"""
|
||||||
hidden_states = hidden_states.transpose(0, 1)
|
hidden_states = hidden_states.transpose(0, 1)
|
||||||
|
|
||||||
|
|||||||
@@ -768,11 +768,11 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer):
|
|||||||
LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
|
LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
|
||||||
`attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer.
|
`attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer.
|
||||||
|
|
||||||
The `attention_mask` is changed in `BertModel.forward` from 0, 1, 2 to -ve: no attention
|
The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:
|
||||||
|
|
||||||
0: local attention
|
|
||||||
+ve: global attention
|
|
||||||
|
|
||||||
|
* -10000: no attention
|
||||||
|
* 0: local attention
|
||||||
|
* +10000: global attention
|
||||||
"""
|
"""
|
||||||
# retrieve input args
|
# retrieve input args
|
||||||
(
|
(
|
||||||
|
|||||||
@@ -522,6 +522,7 @@ class CaptureLogger:
|
|||||||
Context manager to capture `logging` streams
|
Context manager to capture `logging` streams
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
||||||
- logger: 'logging` logger object
|
- logger: 'logging` logger object
|
||||||
|
|
||||||
Results:
|
Results:
|
||||||
@@ -851,9 +852,10 @@ def pytest_terminal_summary_main(tr, id):
|
|||||||
there.
|
there.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
||||||
- tr: `terminalreporter` passed from `conftest.py`
|
- tr: `terminalreporter` passed from `conftest.py`
|
||||||
- id: unique id like `tests` or `examples` that will be incorporated into the final reports
|
- id: unique id like `tests` or `examples` that will be incorporated into the final reports filenames - this is
|
||||||
filenames - this is needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other.
|
needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other.
|
||||||
|
|
||||||
NB: this functions taps into a private _pytest API and while unlikely, it could break should
|
NB: this functions taps into a private _pytest API and while unlikely, it could break should
|
||||||
pytest do internal changes - also it calls default internal methods of terminalreporter which
|
pytest do internal changes - also it calls default internal methods of terminalreporter which
|
||||||
|
|||||||
@@ -191,6 +191,7 @@ def speed_metrics(split, start_time, num_samples=None):
|
|||||||
should be run immediately after the operation to be measured has completed.
|
should be run immediately after the operation to be measured has completed.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
||||||
- split: name to prefix metric (like train, eval, test...)
|
- split: name to prefix metric (like train, eval, test...)
|
||||||
- start_time: operation start time
|
- start_time: operation start time
|
||||||
- num_samples: number of samples processed
|
- num_samples: number of samples processed
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ DOC_SPECIAL_WORD = [
|
|||||||
# Matches any declaration of textual block, like `.. note::`. (ignore case to avoid writing all versions in the list)
|
# Matches any declaration of textual block, like `.. note::`. (ignore case to avoid writing all versions in the list)
|
||||||
_re_textual_blocks = re.compile(r"^\s*\.\.\s+(" + "|".join(TEXTUAL_BLOCKS) + r")\s*::\s*$", re.IGNORECASE)
|
_re_textual_blocks = re.compile(r"^\s*\.\.\s+(" + "|".join(TEXTUAL_BLOCKS) + r")\s*::\s*$", re.IGNORECASE)
|
||||||
# Matches list introduction in rst.
|
# Matches list introduction in rst.
|
||||||
_re_list = re.compile(r"^(\s*-\s+|\s*\*\s+|\s*\d+.\s+)")
|
_re_list = re.compile(r"^(\s*-\s+|\s*\*\s+|\s*\d+\.\s+)")
|
||||||
# Matches the indent in a line.
|
# Matches the indent in a line.
|
||||||
_re_indent = re.compile(r"^(\s*)\S")
|
_re_indent = re.compile(r"^(\s*)\S")
|
||||||
# Matches a table declaration in rst.
|
# Matches a table declaration in rst.
|
||||||
@@ -355,10 +355,34 @@ rst_styler = CodeStyler()
|
|||||||
doc_styler = DocstringStyler()
|
doc_styler = DocstringStyler()
|
||||||
|
|
||||||
|
|
||||||
|
def _add_new_lines_before_list(text):
|
||||||
|
"""Add a new empty line before a list begins."""
|
||||||
|
lines = text.split("\n")
|
||||||
|
new_lines = []
|
||||||
|
in_list = False
|
||||||
|
for idx, line in enumerate(lines):
|
||||||
|
# Detect if the line is the start of a new list.
|
||||||
|
if _re_list.search(line) is not None and not in_list:
|
||||||
|
current_indent = get_indent(line)
|
||||||
|
in_list = True
|
||||||
|
# If the line before is non empty, add an extra new line.
|
||||||
|
if idx > 0 and len(lines[idx - 1]) != 0:
|
||||||
|
new_lines.append("")
|
||||||
|
# Detect if we're out of the current list.
|
||||||
|
if in_list and not line.startswith(current_indent) and _re_list.search(line) is None:
|
||||||
|
in_list = False
|
||||||
|
new_lines.append(line)
|
||||||
|
return "\n".join(new_lines)
|
||||||
|
|
||||||
|
|
||||||
def style_rst_file(doc_file, max_len=119, check_only=False):
|
def style_rst_file(doc_file, max_len=119, check_only=False):
|
||||||
""" Style one rst file `doc_file` to `max_len`."""
|
""" Style one rst file `doc_file` to `max_len`."""
|
||||||
with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
|
with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
|
||||||
doc = f.read()
|
doc = f.read()
|
||||||
|
|
||||||
|
# Add missing new lines before lists
|
||||||
|
doc = _add_new_lines_before_list(doc)
|
||||||
|
# Style
|
||||||
clean_doc = rst_styler.style(doc, max_len=max_len)
|
clean_doc = rst_styler.style(doc, max_len=max_len)
|
||||||
|
|
||||||
diff = clean_doc != doc
|
diff = clean_doc != doc
|
||||||
@@ -391,6 +415,8 @@ def style_docstring(docstring, max_len=119):
|
|||||||
|
|
||||||
# Add missing new lines before Args/Returns etc.
|
# Add missing new lines before Args/Returns etc.
|
||||||
docstring = _re_any_doc_special_word.sub(r"\n\n\1\2\3\n", docstring)
|
docstring = _re_any_doc_special_word.sub(r"\n\n\1\2\3\n", docstring)
|
||||||
|
# Add missing new lines before lists
|
||||||
|
docstring = _add_new_lines_before_list(docstring)
|
||||||
# Style
|
# Style
|
||||||
styled_doc = doc_styler.style(docstring, max_len=max_len, min_indent=indent)
|
styled_doc = doc_styler.style(docstring, max_len=max_len, min_indent=indent)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user