From d03695f3a2c7abe9f790dc86492697dd7d9e539e Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Fri, 26 Feb 2021 17:53:28 +0300 Subject: [PATCH] [LED] Correct Docs (#10419) * correct docs * correct tf model docs as well --- src/transformers/models/led/modeling_led.py | 13 +++++++++++-- src/transformers/models/led/modeling_tf_led.py | 15 ++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index 9d8db25796..e055ac2142 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -1516,8 +1516,17 @@ LED_INPUTS_DOCSTRING = r""" `What are attention masks? <../glossary.html#attention-mask>`__ decoder_input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`): - Provide for translation and summarization training. By default, the model will create this tensor by - shifting the :obj:`input_ids` to the right, following the paper. + Indices of decoder input sequence tokens in the vocabulary. + + Indices can be obtained using :class:`~transformers.LedTokenizer`. See + :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for + details. + + `What are input IDs? <../glossary.html#input-ids>`__ + + LED uses the :obj:`eos_token_id` as the starting token for :obj:`decoder_input_ids` generation. If + :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see + :obj:`past_key_values`). decoder_attention_mask (:obj:`torch.LongTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`): Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will also be used by default. diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index cdc5619671..6ed546ec53 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1533,9 +1533,18 @@ LED_INPUTS_DOCSTRING = r""" - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ - decoder_input_ids (:obj:`tf.Tensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`): - Provide for translation and summarization training. By default, the model will create this tensor by - shifting the input_ids right, following the paper. + decoder_input_ids (:obj:`tf.LongTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`): + Indices of decoder input sequence tokens in the vocabulary. + + Indices can be obtained using :class:`~transformers.LedTokenizer`. See + :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for + details. + + `What are input IDs? <../glossary.html#input-ids>`__ + + LED uses the :obj:`eos_token_id` as the starting token for :obj:`decoder_input_ids` generation. If + :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see + :obj:`past_key_values`). decoder_attention_mask (:obj:`tf.Tensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`): will be made by default and ignore pad tokens. It is not recommended to set this for most use cases. head_mask (:obj:`tf.Tensor` of shape :obj:`(encoder_layers, encoder_attention_heads)`, `optional`):