From 9bd67ac7bba33329ad50e519908bca4c1ad4686f Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Thu, 12 May 2022 19:25:16 +0100 Subject: [PATCH] update BART docs (#17212) --- src/transformers/models/bart/modeling_bart.py | 6 +++--- .../models/bigbird_pegasus/modeling_bigbird_pegasus.py | 4 ++-- src/transformers/models/opt/modeling_opt.py | 6 +++--- .../models/speech_to_text/modeling_speech_to_text.py | 6 +++--- .../modeling_{{cookiecutter.lowercase_modelname}}.py | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index 9b8e4399f2..fa9b659bee 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -624,9 +624,9 @@ BART_INPUTS_DOCSTRING = r""" Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default. - If you want to change padding behavior, you should read [`modeling_bart._prepare_decoder_inputs`] and - modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more information - on the default strategy. + If you want to change padding behavior, you should read [`modeling_bart._prepare_decoder_attention_mask`] + and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more + information on the default strategy. head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*): Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in `[0, 1]`: diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 248d4b480c..4a4bb96375 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -1685,8 +1685,8 @@ BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r""" be used by default. If you want to change padding behavior, you should read - [`modeling_bigbird_pegasus._prepare_decoder_inputs`] and modify to your needs. See diagram 1 in [the - paper](https://arxiv.org/abs/1910.13461) for more information on the default strategy. + [`modeling_bigbird_pegasus._prepare_decoder_attention_mask`] and modify to your needs. See diagram 1 in + [the paper](https://arxiv.org/abs/1910.13461) for more information on the default strategy. decoder_head_mask (`torch.Tensor` of shape `(num_layers, num_heads)`, *optional*): Mask to nullify selected heads of the attention modules in the decoder. Mask values selected in `[0, 1]`: diff --git a/src/transformers/models/opt/modeling_opt.py b/src/transformers/models/opt/modeling_opt.py index 582853f042..f0e9770c6c 100644 --- a/src/transformers/models/opt/modeling_opt.py +++ b/src/transformers/models/opt/modeling_opt.py @@ -464,9 +464,9 @@ OPT_INPUTS_DOCSTRING = r""" If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see `past_key_values`). - If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_inputs`] and modify - to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more information on the - default strategy. + If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`] + and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more + information on the default strategy. head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*): Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in `[0, 1]`: diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index 7c2e183537..8f3062e6c7 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -625,9 +625,9 @@ SPEECH_TO_TEXT_INPUTS_DOCSTRING = r""" Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default. - If you want to change padding behavior, you should read [`modeling_speech_to_text._prepare_decoder_inputs`] - and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more - information on the default strategy. + If you want to change padding behavior, you should read + [`modeling_speech_to_text._prepare_decoder_attention_mask`] and modify to your needs. See diagram 1 in [the + paper](https://arxiv.org/abs/1910.13461) for more information on the default strategy. head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*): Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in `[0, 1]`: diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py index 938cbea65c..7d09a77b70 100755 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py @@ -2100,7 +2100,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel): Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also be used by default. - If you want to change padding behavior, you should read [`modeling_{{cookiecutter.lowercase_modelname}}._prepare_decoder_inputs`] and + If you want to change padding behavior, you should read [`modeling_{{cookiecutter.lowercase_modelname}}._prepare_decoder_attention_mask`] and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more information on the default strategy. head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*):