diff --git a/docs/source/model_doc/led.rst b/docs/source/model_doc/led.rst index cb7a46ab6e..4dbdbbaeb3 100644 --- a/docs/source/model_doc/led.rst +++ b/docs/source/model_doc/led.rst @@ -40,7 +40,6 @@ Tips: *Longformer*'s *chunked self-attention* layer. :class:`~transformers.LEDTokenizer` is an alias of :class:`~transformers.BartTokenizer`. - LED works very well on long-range *sequence-to-sequence* tasks where the ``input_ids`` largely exceed a length of - 1024 tokens. - LED pads the ``input_ids`` to be a multiple of ``config.attention_window`` if required. Therefore a small speed-up is gained, when :class:`~transformers.LEDTokenizer` is used with the ``pad_to_multiple_of`` argument. diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index 03033d7792..2cee9a1256 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -443,16 +443,22 @@ TF_DPR_START_DOCSTRING = r""" .. note:: - TF 2.0 models accepts two formats as inputs: - having all inputs as keyword arguments (like PyTorch models), or - - having all inputs as a list, tuple or dict in the first positional arguments. This second option is useful - when using :meth:`tf.keras.Model.fit` method which currently requires having all the tensors in the first - argument of the model call function: :obj:`model(inputs)`. If you choose this second option, there are three - possibilities you can use to gather all the input Tensors in the first positional argument : - a single Tensor - with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)` - a list of varying length with one or - several input Tensors IN THE ORDER given in the docstring: :obj:`model([input_ids, attention_mask])` or - :obj:`model([input_ids, attention_mask, token_type_ids])` - a dictionary with one or several input Tensors - associated to the input names given in the docstring: :obj:`model({"input_ids": input_ids, "token_type_ids": - token_type_ids})` + TF 2.0 models accepts two formats as inputs: + + - having all inputs as keyword arguments (like PyTorch models), or + - having all inputs as a list, tuple or dict in the first positional arguments. + + This second option is useful when using :meth:`tf.keras.Model.fit` method which currently requires having all + the tensors in the first argument of the model call function: :obj:`model(inputs)`. + + If you choose this second option, there are three possibilities you can use to gather all the input Tensors in + the first positional argument : + + - a single Tensor with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)` + - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring: + :obj:`model([input_ids, attention_mask])` or :obj:`model([input_ids, attention_mask, token_type_ids])` + - a dictionary with one or several input Tensors associated to the input names given in the docstring: + :obj:`model({"input_ids": input_ids, "token_type_ids": token_type_ids})` Parameters: config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model. diff --git a/src/transformers/models/layoutlm/modeling_layoutlm.py b/src/transformers/models/layoutlm/modeling_layoutlm.py index 751a65959e..3e3318121a 100644 --- a/src/transformers/models/layoutlm/modeling_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_layoutlm.py @@ -638,8 +638,8 @@ LAYOUTLM_INPUTS_DOCSTRING = r""" `What are input IDs? <../glossary.html#input-ids>`__ bbox (:obj:`torch.LongTensor` of shape :obj:`({0}, 4)`, `optional`): - Bounding Boxes of each input sequence tokens. Selected in the range ``[0, config.max_2d_position_embeddings - - 1]``. + Bounding Boxes of each input sequence tokens. Selected in the range ``[0, + config.max_2d_position_embeddings-1]``. attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens. diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index 7f86d261e9..098a54818c 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -172,11 +172,11 @@ class LEDEncoderSelfAttention(nn.Module): :class:`LEDEncoderSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to `attention_window` happens in :meth:`LEDEncoderModel.forward` to avoid redoing the padding on each layer. - The `attention_mask` is changed in :meth:`BertModel.forward` from 0, 1, 2 to -ve: no attention - - 0: local attention - +ve: global attention + The `attention_mask` is changed in :meth:`LEDEncoderModel.forward` from 0, 1, 2 to: + * -10000: no attention + * 0: local attention + * +10000: global attention """ hidden_states = hidden_states.transpose(0, 1) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 6ead22d27f..c9d72f3f1d 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -190,11 +190,11 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to `attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer. - The `attention_mask` is changed in `BertModel.forward` from 0, 1, 2 to -ve: no attention - - 0: local attention - +ve: global attention + The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to: + * -10000: no attention + * 0: local attention + * +10000: global attention """ # retrieve input args ( diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 0675b9f266..8ffaab5d79 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -561,11 +561,11 @@ class LongformerSelfAttention(nn.Module): :class:`LongformerSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to `attention_window` happens in :meth:`LongformerModel.forward` to avoid redoing the padding on each layer. - The `attention_mask` is changed in :meth:`BertModel.forward` from 0, 1, 2 to -ve: no attention - - 0: local attention - +ve: global attention + The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to: + * -10000: no attention + * 0: local attention + * +10000: global attention """ hidden_states = hidden_states.transpose(0, 1) diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 5afe0bf05b..e429f58416 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -768,11 +768,11 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to `attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer. - The `attention_mask` is changed in `BertModel.forward` from 0, 1, 2 to -ve: no attention - - 0: local attention - +ve: global attention + The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to: + * -10000: no attention + * 0: local attention + * +10000: global attention """ # retrieve input args ( diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index defb68b397..aa92b1771e 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -522,6 +522,7 @@ class CaptureLogger: Context manager to capture `logging` streams Args: + - logger: 'logging` logger object Results: @@ -851,9 +852,10 @@ def pytest_terminal_summary_main(tr, id): there. Args: + - tr: `terminalreporter` passed from `conftest.py` - - id: unique id like `tests` or `examples` that will be incorporated into the final reports - filenames - this is needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other. + - id: unique id like `tests` or `examples` that will be incorporated into the final reports filenames - this is + needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other. NB: this functions taps into a private _pytest API and while unlikely, it could break should pytest do internal changes - also it calls default internal methods of terminalreporter which diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index da59d534b1..ff41391437 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -191,6 +191,7 @@ def speed_metrics(split, start_time, num_samples=None): should be run immediately after the operation to be measured has completed. Args: + - split: name to prefix metric (like train, eval, test...) - start_time: operation start time - num_samples: number of samples processed diff --git a/utils/style_doc.py b/utils/style_doc.py index 6469e602e8..da251b8fb3 100644 --- a/utils/style_doc.py +++ b/utils/style_doc.py @@ -42,7 +42,7 @@ DOC_SPECIAL_WORD = [ # Matches any declaration of textual block, like `.. note::`. (ignore case to avoid writing all versions in the list) _re_textual_blocks = re.compile(r"^\s*\.\.\s+(" + "|".join(TEXTUAL_BLOCKS) + r")\s*::\s*$", re.IGNORECASE) # Matches list introduction in rst. -_re_list = re.compile(r"^(\s*-\s+|\s*\*\s+|\s*\d+.\s+)") +_re_list = re.compile(r"^(\s*-\s+|\s*\*\s+|\s*\d+\.\s+)") # Matches the indent in a line. _re_indent = re.compile(r"^(\s*)\S") # Matches a table declaration in rst. @@ -355,10 +355,34 @@ rst_styler = CodeStyler() doc_styler = DocstringStyler() +def _add_new_lines_before_list(text): + """Add a new empty line before a list begins.""" + lines = text.split("\n") + new_lines = [] + in_list = False + for idx, line in enumerate(lines): + # Detect if the line is the start of a new list. + if _re_list.search(line) is not None and not in_list: + current_indent = get_indent(line) + in_list = True + # If the line before is non empty, add an extra new line. + if idx > 0 and len(lines[idx - 1]) != 0: + new_lines.append("") + # Detect if we're out of the current list. + if in_list and not line.startswith(current_indent) and _re_list.search(line) is None: + in_list = False + new_lines.append(line) + return "\n".join(new_lines) + + def style_rst_file(doc_file, max_len=119, check_only=False): """ Style one rst file `doc_file` to `max_len`.""" with open(doc_file, "r", encoding="utf-8", newline="\n") as f: doc = f.read() + + # Add missing new lines before lists + doc = _add_new_lines_before_list(doc) + # Style clean_doc = rst_styler.style(doc, max_len=max_len) diff = clean_doc != doc @@ -391,6 +415,8 @@ def style_docstring(docstring, max_len=119): # Add missing new lines before Args/Returns etc. docstring = _re_any_doc_special_word.sub(r"\n\n\1\2\3\n", docstring) + # Add missing new lines before lists + docstring = _add_new_lines_before_list(docstring) # Style styled_doc = doc_styler.style(docstring, max_len=max_len, min_indent=indent)