From 9931f817b75ecb2c8bb08b6e9d4cbec4b0933935 Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Mon, 15 Jun 2020 03:36:14 +0800 Subject: [PATCH] fix (#4976) --- src/transformers/modeling_albert.py | 2 +- src/transformers/modeling_bart.py | 2 +- src/transformers/modeling_bert.py | 2 +- src/transformers/modeling_camembert.py | 2 +- src/transformers/modeling_ctrl.py | 2 +- src/transformers/modeling_distilbert.py | 2 +- src/transformers/modeling_electra.py | 2 +- src/transformers/modeling_flaubert.py | 2 +- src/transformers/modeling_gpt2.py | 2 +- src/transformers/modeling_longformer.py | 2 +- src/transformers/modeling_mmbt.py | 2 +- src/transformers/modeling_openai.py | 6 ++---- src/transformers/modeling_reformer.py | 2 +- src/transformers/modeling_roberta.py | 2 +- src/transformers/modeling_t5.py | 2 +- src/transformers/modeling_tf_albert.py | 2 +- src/transformers/modeling_tf_bert.py | 2 +- src/transformers/modeling_tf_camembert.py | 2 +- src/transformers/modeling_tf_ctrl.py | 2 +- src/transformers/modeling_tf_distilbert.py | 2 +- src/transformers/modeling_tf_electra.py | 2 +- src/transformers/modeling_tf_flaubert.py | 2 +- src/transformers/modeling_tf_gpt2.py | 2 +- src/transformers/modeling_tf_openai.py | 4 ++-- src/transformers/modeling_tf_roberta.py | 2 +- src/transformers/modeling_tf_t5.py | 2 +- src/transformers/modeling_tf_transfo_xl.py | 2 +- src/transformers/modeling_tf_xlm.py | 2 +- src/transformers/modeling_tf_xlm_roberta.py | 2 +- src/transformers/modeling_tf_xlnet.py | 2 +- src/transformers/modeling_transfo_xl.py | 2 +- src/transformers/modeling_xlm.py | 2 +- src/transformers/modeling_xlm_roberta.py | 2 +- src/transformers/modeling_xlnet.py | 2 +- 34 files changed, 36 insertions(+), 38 deletions(-) diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index cca9633db4..8aca23da9f 100644 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -423,7 +423,7 @@ ALBERT_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_bart.py b/src/transformers/modeling_bart.py index 09a52eae9f..9eab989ab2 100644 --- a/src/transformers/modeling_bart.py +++ b/src/transformers/modeling_bart.py @@ -90,7 +90,7 @@ BART_INPUTS_DOCSTRING = r""" Default behavior: generate a tensor that ignores pad tokens in decoder_input_ids. Causal mask will also be used by default. If you want to change padding behavior, you should read :func:`~transformers.modeling_bart._prepare_decoder_inputs` and modify. See diagram 1 in the paper for more info on the default strategy - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index a06417778f..18f9bfa088 100644 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -594,7 +594,7 @@ BERT_INPUTS_DOCSTRING = r""" is used in the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``: ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_camembert.py b/src/transformers/modeling_camembert.py index f82d7f41c6..00dd5a74d0 100644 --- a/src/transformers/modeling_camembert.py +++ b/src/transformers/modeling_camembert.py @@ -49,7 +49,7 @@ CAMEMBERT_START_DOCSTRING = r""" model. Initializing with a config file does not load the weights associated with the model, only the configuration. Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_ctrl.py b/src/transformers/modeling_ctrl.py index 1e16c4c56d..cbcf733031 100644 --- a/src/transformers/modeling_ctrl.py +++ b/src/transformers/modeling_ctrl.py @@ -284,7 +284,7 @@ CTRL_INPUTS_DOCSTRING = r""" use_cache (:obj:`bool`): If `use_cache` is True, `past` key value states are returned and can be used to speed up decoding (see `past`). Defaults to `True`. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_distilbert.py b/src/transformers/modeling_distilbert.py index 522b5bef45..f802573d13 100644 --- a/src/transformers/modeling_distilbert.py +++ b/src/transformers/modeling_distilbert.py @@ -377,7 +377,7 @@ DISTILBERT_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_electra.py b/src/transformers/modeling_electra.py index df8ef8c10e..d4aa2eb50f 100644 --- a/src/transformers/modeling_electra.py +++ b/src/transformers/modeling_electra.py @@ -220,7 +220,7 @@ ELECTRA_INPUTS_DOCSTRING = r""" is used in the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``: ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_flaubert.py b/src/transformers/modeling_flaubert.py index bf81199dc3..f7af51cc44 100644 --- a/src/transformers/modeling_flaubert.py +++ b/src/transformers/modeling_flaubert.py @@ -100,7 +100,7 @@ FLAUBERT_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_gpt2.py b/src/transformers/modeling_gpt2.py index a29de32f38..c14556e9ef 100644 --- a/src/transformers/modeling_gpt2.py +++ b/src/transformers/modeling_gpt2.py @@ -335,7 +335,7 @@ GPT2_INPUTS_DOCSTRING = r""" If `past` is used, optionally only the last `inputs_embeds` have to be input (see `past`). use_cache (:obj:`bool`): If `use_cache` is True, `past` key value states are returned and can be used to speed up decoding (see `past`). Defaults to `True`. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_longformer.py b/src/transformers/modeling_longformer.py index a6ba9f739e..7d70c311c3 100644 --- a/src/transformers/modeling_longformer.py +++ b/src/transformers/modeling_longformer.py @@ -487,7 +487,7 @@ LONGFORMER_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_mmbt.py b/src/transformers/modeling_mmbt.py index a226eadbe3..8ee339df4a 100644 --- a/src/transformers/modeling_mmbt.py +++ b/src/transformers/modeling_mmbt.py @@ -141,7 +141,7 @@ MMBT_INPUTS_DOCSTRING = r""" Inputs: is used in the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``: ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_openai.py b/src/transformers/modeling_openai.py index d66f4a7727..3965c7050c 100644 --- a/src/transformers/modeling_openai.py +++ b/src/transformers/modeling_openai.py @@ -322,9 +322,7 @@ OPENAI_GPT_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to :obj:`False`): - Should the model returns attentions weights. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ @@ -607,7 +605,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): r""" mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) Index of the classification token in each input sequence. - Selected in the range ``[0, input_ids.size(-1) - 1[``. + Selected in the range ``[0, input_ids.size(-1) - 1]``. labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`) Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set ``labels = input_ids`` diff --git a/src/transformers/modeling_reformer.py b/src/transformers/modeling_reformer.py index d003f8335d..b1e1df54e5 100644 --- a/src/transformers/modeling_reformer.py +++ b/src/transformers/modeling_reformer.py @@ -1505,7 +1505,7 @@ REFORMER_INPUTS_DOCSTRING = r""" bucketing. Setting `num_hashes` overwrites the default `num_hashes` defined in `config.num_hashes`. For more information, see `num_hashes` in :class:`transformers.ReformerConfig`. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_roberta.py b/src/transformers/modeling_roberta.py index 579be366bd..6a7c8a86e5 100644 --- a/src/transformers/modeling_roberta.py +++ b/src/transformers/modeling_roberta.py @@ -130,7 +130,7 @@ ROBERTA_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_t5.py b/src/transformers/modeling_t5.py index f04f4ab4e6..76ce73d4af 100644 --- a/src/transformers/modeling_t5.py +++ b/src/transformers/modeling_t5.py @@ -841,7 +841,7 @@ T5_INPUTS_DOCSTRING = r""" Mask to nullify selected heads of the self-attention modules. Mask values selected in ``[0, 1]``: ``1`` indicates the head is **not masked**, ``0`` indicates the head is **masked**. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_albert.py b/src/transformers/modeling_tf_albert.py index 5e2ec2e593..621f4cef80 100644 --- a/src/transformers/modeling_tf_albert.py +++ b/src/transformers/modeling_tf_albert.py @@ -688,7 +688,7 @@ ALBERT_INPUTS_DOCSTRING = r""" training (:obj:`boolean`, `optional`, defaults to :obj:`False`): Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them (if set to :obj:`False`) for evaluation. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_bert.py b/src/transformers/modeling_tf_bert.py index 2a1b7d652f..c4a5b9d5c6 100644 --- a/src/transformers/modeling_tf_bert.py +++ b/src/transformers/modeling_tf_bert.py @@ -682,7 +682,7 @@ BERT_INPUTS_DOCSTRING = r""" training (:obj:`boolean`, `optional`, defaults to :obj:`False`): Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them (if set to :obj:`False`) for evaluation. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_camembert.py b/src/transformers/modeling_tf_camembert.py index b7a4ee55b3..1fefe7b3bb 100644 --- a/src/transformers/modeling_tf_camembert.py +++ b/src/transformers/modeling_tf_camembert.py @@ -62,7 +62,7 @@ CAMEMBERT_START_DOCSTRING = r""" config (:class:`~transformers.CamembertConfig`): Model configuration class with all the parameters of the model. Initializing with a config file does not load the weights associated with the model, only the configuration. Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_ctrl.py b/src/transformers/modeling_tf_ctrl.py index 220614befe..3230750121 100644 --- a/src/transformers/modeling_tf_ctrl.py +++ b/src/transformers/modeling_tf_ctrl.py @@ -464,7 +464,7 @@ CTRL_INPUTS_DOCSTRING = r""" training (:obj:`boolean`, `optional`, defaults to :obj:`False`): Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them (if set to :obj:`False`) for evaluation. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_distilbert.py b/src/transformers/modeling_tf_distilbert.py index 8a45bc1324..76e5f42b6f 100644 --- a/src/transformers/modeling_tf_distilbert.py +++ b/src/transformers/modeling_tf_distilbert.py @@ -549,7 +549,7 @@ DISTILBERT_INPUTS_DOCSTRING = r""" Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them (if set to :obj:`False`) for evaluation. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_electra.py b/src/transformers/modeling_tf_electra.py index 9f90673f73..3a100eb616 100644 --- a/src/transformers/modeling_tf_electra.py +++ b/src/transformers/modeling_tf_electra.py @@ -348,7 +348,7 @@ ELECTRA_INPUTS_DOCSTRING = r""" Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them (if set to :obj:`False`) for evaluation. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_flaubert.py b/src/transformers/modeling_tf_flaubert.py index 3736eb5d21..08f6601350 100644 --- a/src/transformers/modeling_tf_flaubert.py +++ b/src/transformers/modeling_tf_flaubert.py @@ -100,7 +100,7 @@ FLAUBERT_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_gpt2.py b/src/transformers/modeling_tf_gpt2.py index 11dd6a18bb..9e715477e6 100644 --- a/src/transformers/modeling_tf_gpt2.py +++ b/src/transformers/modeling_tf_gpt2.py @@ -467,7 +467,7 @@ GPT2_INPUTS_DOCSTRING = r""" training (:obj:`boolean`, `optional`, defaults to :obj:`False`): Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them (if set to :obj:`False`) for evaluation. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_openai.py b/src/transformers/modeling_tf_openai.py index 50567ebff8..28294136a2 100644 --- a/src/transformers/modeling_tf_openai.py +++ b/src/transformers/modeling_tf_openai.py @@ -429,7 +429,7 @@ OPENAI_GPT_INPUTS_DOCSTRING = r""" training (:obj:`boolean`, `optional`, defaults to :obj:`False`): Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them (if set to :obj:`False`) for evaluation. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ @@ -568,7 +568,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): r""" mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) Index of the classification token in each input sequence. - Selected in the range ``[0, input_ids.size(-1) - 1[``. + Selected in the range ``[0, input_ids.size(-1) - 1]``. Return: :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.OpenAIGPTConfig`) and inputs: diff --git a/src/transformers/modeling_tf_roberta.py b/src/transformers/modeling_tf_roberta.py index f81dc39077..00cadc1145 100644 --- a/src/transformers/modeling_tf_roberta.py +++ b/src/transformers/modeling_tf_roberta.py @@ -182,7 +182,7 @@ ROBERTA_INPUTS_DOCSTRING = r""" training (:obj:`boolean`, `optional`, defaults to :obj:`False`): Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them (if set to :obj:`False`) for evaluation. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_t5.py b/src/transformers/modeling_tf_t5.py index dd9f44d455..515ee74265 100644 --- a/src/transformers/modeling_tf_t5.py +++ b/src/transformers/modeling_tf_t5.py @@ -855,7 +855,7 @@ T5_INPUTS_DOCSTRING = r""" Mask to nullify selected heads of the self-attention modules. Mask values selected in ``[0, 1]``: ``1`` indicates the head is **not masked**, ``0`` indicates the head is **masked**. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_transfo_xl.py b/src/transformers/modeling_tf_transfo_xl.py index ab40fea3f8..b55effafdc 100644 --- a/src/transformers/modeling_tf_transfo_xl.py +++ b/src/transformers/modeling_tf_transfo_xl.py @@ -692,7 +692,7 @@ TRANSFO_XL_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_xlm.py b/src/transformers/modeling_tf_xlm.py index 50e0b151cc..53f3e699da 100644 --- a/src/transformers/modeling_tf_xlm.py +++ b/src/transformers/modeling_tf_xlm.py @@ -585,7 +585,7 @@ XLM_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_xlm_roberta.py b/src/transformers/modeling_tf_xlm_roberta.py index ea56f4a781..5448595a7b 100644 --- a/src/transformers/modeling_tf_xlm_roberta.py +++ b/src/transformers/modeling_tf_xlm_roberta.py @@ -62,7 +62,7 @@ XLM_ROBERTA_START_DOCSTRING = r""" config (:class:`~transformers.XLMRobertaConfig`): Model configuration class with all the parameters of the model. Initializing with a config file does not load the weights associated with the model, only the configuration. Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_tf_xlnet.py b/src/transformers/modeling_tf_xlnet.py index 89aacb18bc..5532bcb2da 100644 --- a/src/transformers/modeling_tf_xlnet.py +++ b/src/transformers/modeling_tf_xlnet.py @@ -809,7 +809,7 @@ XLNET_INPUTS_DOCSTRING = r""" than the model's internal embedding lookup matrix. use_cache (:obj:`bool`): If `use_cache` is True, `mems` are returned and can be used to speed up decoding (see `mems`). Defaults to `True`. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_transfo_xl.py b/src/transformers/modeling_transfo_xl.py index 663fffc940..216ea12507 100644 --- a/src/transformers/modeling_transfo_xl.py +++ b/src/transformers/modeling_transfo_xl.py @@ -622,7 +622,7 @@ TRANSFO_XL_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_xlm.py b/src/transformers/modeling_xlm.py index 054e9a05dd..d6b91266ae 100644 --- a/src/transformers/modeling_xlm.py +++ b/src/transformers/modeling_xlm.py @@ -302,7 +302,7 @@ XLM_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_xlm_roberta.py b/src/transformers/modeling_xlm_roberta.py index 361b40e9aa..cc157ff6f0 100644 --- a/src/transformers/modeling_xlm_roberta.py +++ b/src/transformers/modeling_xlm_roberta.py @@ -53,7 +53,7 @@ XLM_ROBERTA_START_DOCSTRING = r""" config (:class:`~transformers.XLMRobertaConfig`): Model configuration class with all the parameters of the model. Initializing with a config file does not load the weights associated with the model, only the configuration. Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """ diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index 9ddac6a51e..e5ce6b0e98 100644 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -618,7 +618,7 @@ XLNET_INPUTS_DOCSTRING = r""" than the model's internal embedding lookup matrix. use_cache (:obj:`bool`): If `use_cache` is True, `mems` are returned and can be used to speed up decoding (see `mems`). Defaults to `True`. - output_attentions (:obj:`bool`, `optional`, defaults to `:obj:`None`): + output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`): If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail. """