Fix typo in all model docs (#7714)

2020-10-12 04:06:59 -04:00
parent 83086858f8
commit 13c1857718
43 changed files with 51 additions and 51 deletions
--- a/src/transformers/modeling_albert.py
+++ b/src/transformers/modeling_albert.py
@@ -539,7 +539,7 @@ ALBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_bart.py
+++ b/src/transformers/modeling_bart.py
@@ -113,7 +113,7 @@ BART_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        decoder_input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
--- a/src/transformers/modeling_bert_generation.py
+++ b/src/transformers/modeling_bert_generation.py
@@ -218,7 +218,7 @@ BERT_GENERATION_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
@@ -450,7 +450,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel):
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
            Labels for computing the left-to-right language modeling loss (next word prediction).
            Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
--- a/src/transformers/modeling_ctrl.py
+++ b/src/transformers/modeling_ctrl.py
@@ -273,7 +273,7 @@ CTRL_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_distilbert.py
+++ b/src/transformers/modeling_distilbert.py
@@ -401,7 +401,7 @@ DISTILBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        head_mask (:obj:`torch.FloatTensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`):
--- a/src/transformers/modeling_dpr.py
+++ b/src/transformers/modeling_dpr.py
@@ -358,7 +358,7 @@ DPR_ENCODERS_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
@@ -403,7 +403,7 @@ DPR_READER_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(n_passages, sequence_length, hidden_size)`, `optional`):
--- a/src/transformers/modeling_electra.py
+++ b/src/transformers/modeling_electra.py
@@ -611,7 +611,7 @@ ELECTRA_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_encoder_decoder.py
+++ b/src/transformers/modeling_encoder_decoder.py
@@ -74,7 +74,7 @@ ENCODER_DECODER_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        decoder_input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
--- a/src/transformers/modeling_flaubert.py
+++ b/src/transformers/modeling_flaubert.py
@@ -81,7 +81,7 @@ FLAUBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_fsmt.py
+++ b/src/transformers/modeling_fsmt.py
@@ -224,7 +224,7 @@ FSMT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        decoder_input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
--- a/src/transformers/modeling_funnel.py
+++ b/src/transformers/modeling_funnel.py
@@ -857,7 +857,7 @@ FUNNEL_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@@ -429,7 +429,7 @@ GPT2_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, input_ids_length)`, `optional`):
--- a/src/transformers/modeling_longformer.py
+++ b/src/transformers/modeling_longformer.py
@@ -1018,7 +1018,7 @@ LONGFORMER_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        global_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_lxmert.py
+++ b/src/transformers/modeling_lxmert.py
@@ -848,7 +848,7 @@ LXMERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        visual_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
@@ -856,7 +856,7 @@ LXMERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_mmbt.py
+++ b/src/transformers/modeling_mmbt.py
@@ -123,7 +123,7 @@ MMBT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
@@ -167,7 +167,7 @@ MMBT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

        output_attentions (:obj:`bool`, `optional`):
            Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned
--- a/src/transformers/modeling_mobilebert.py
+++ b/src/transformers/modeling_mobilebert.py
@@ -756,7 +756,7 @@ MOBILEBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
@@ -792,7 +792,7 @@ MOBILEBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

        output_attentions (:obj:`bool`, `optional`):
            Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned
--- a/src/transformers/modeling_openai.py
+++ b/src/transformers/modeling_openai.py
@@ -360,7 +360,7 @@ OPENAI_GPT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_rag.py
+++ b/src/transformers/modeling_rag.py
@@ -406,7 +406,7 @@ RAG_FORWARD_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        encoder_outputs (:obj:`tuple(tuple(torch.FloatTensor)`, `optional`)
@@ -836,7 +836,7 @@ class RagSequenceForGeneration(RagPreTrainedModel):
                Mask values selected in ``[0, 1]``:

                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **maked**.
+                - 0 for tokens that are **masked**.

                `What are attention masks? <../glossary.html#attention-mask>`__
            context_input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size * config.n_docs, config.max_combined_length)`, `optional`, returned when `output_retrieved=True`):
@@ -1221,7 +1221,7 @@ class RagTokenForGeneration(RagPreTrainedModel):
                Mask values selected in ``[0, 1]``:

                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **maked**.
+                - 0 for tokens that are **masked**.

                `What are attention masks? <../glossary.html#attention-mask>`__
            context_input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size * config.n_docs, config.max_combined_length)`, `optional`, returned when `output_retrieved=True`):
--- a/src/transformers/modeling_reformer.py
+++ b/src/transformers/modeling_reformer.py
@@ -1926,7 +1926,7 @@ REFORMER_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        position_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_retribert.py
+++ b/src/transformers/modeling_retribert.py
@@ -185,7 +185,7 @@ class RetriBertModel(RetriBertPreTrainedModel):
                Mask values selected in ``[0, 1]``:

                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **maked**.
+                - 0 for tokens that are **masked**.

                `What are attention masks? <../glossary.html#attention-mask>`__
            input_ids_doc (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`):
--- a/src/transformers/modeling_roberta.py
+++ b/src/transformers/modeling_roberta.py
@@ -506,7 +506,7 @@ ROBERTA_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_squeezebert.py
+++ b/src/transformers/modeling_squeezebert.py
@@ -461,7 +461,7 @@ SQUEEZEBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_t5.py
+++ b/src/transformers/modeling_t5.py
@@ -843,7 +843,7 @@ T5_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        decoder_input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
--- a/src/transformers/modeling_tf_albert.py
+++ b/src/transformers/modeling_tf_albert.py
@@ -690,7 +690,7 @@ ALBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_tf_bert.py
+++ b/src/transformers/modeling_tf_bert.py
@@ -735,7 +735,7 @@ BERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_tf_ctrl.py
+++ b/src/transformers/modeling_tf_ctrl.py
@@ -495,7 +495,7 @@ CTRL_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_tf_distilbert.py
+++ b/src/transformers/modeling_tf_distilbert.py
@@ -550,7 +550,7 @@ DISTILBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        head_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`):
--- a/src/transformers/modeling_tf_electra.py
+++ b/src/transformers/modeling_tf_electra.py
@@ -665,7 +665,7 @@ ELECTRA_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        position_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_tf_flaubert.py
+++ b/src/transformers/modeling_tf_flaubert.py
@@ -96,7 +96,7 @@ FLAUBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - ``1`` for tokens that are **not masked**,
-            - ``0`` for tokens that are **maked**.
+            - ``0`` for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        langs (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_tf_funnel.py
+++ b/src/transformers/modeling_tf_funnel.py
@@ -1099,7 +1099,7 @@ FUNNEL_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_tf_gpt2.py
+++ b/src/transformers/modeling_tf_gpt2.py
@@ -508,7 +508,7 @@ GPT2_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_tf_longformer.py
+++ b/src/transformers/modeling_tf_longformer.py
@@ -1534,7 +1534,7 @@ LONGFORMER_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        global_attention_mask (:obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_tf_lxmert.py
+++ b/src/transformers/modeling_tf_lxmert.py
@@ -921,7 +921,7 @@ LXMERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        visual_attention_mask (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
@@ -929,7 +929,7 @@ LXMERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_tf_mobilebert.py
+++ b/src/transformers/modeling_tf_mobilebert.py
@@ -903,7 +903,7 @@ MOBILEBERT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_tf_openai.py
+++ b/src/transformers/modeling_tf_openai.py
@@ -444,7 +444,7 @@ OPENAI_GPT_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length)`, `optional`):
--- a/src/transformers/modeling_tf_roberta.py
+++ b/src/transformers/modeling_tf_roberta.py
@@ -654,7 +654,7 @@ ROBERTA_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_tf_t5.py
+++ b/src/transformers/modeling_tf_t5.py
@@ -913,7 +913,7 @@ T5_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        decoder_attention_mask (:obj:`tf.Tensor` of shape :obj:`(batch_size, tgt_seq_len)`, `optional`):
--- a/src/transformers/modeling_tf_xlm.py
+++ b/src/transformers/modeling_tf_xlm.py
@@ -626,7 +626,7 @@ XLM_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        langs (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_tf_xlnet.py
+++ b/src/transformers/modeling_tf_xlnet.py
@@ -1057,7 +1057,7 @@ XLNET_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        mems (:obj:`List[torch.FloatTensor]` of length :obj:`config.n_layers`):
--- a/src/transformers/modeling_xlm.py
+++ b/src/transformers/modeling_xlm.py
@@ -337,7 +337,7 @@ XLM_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        langs (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
@@ -866,7 +866,7 @@ XLNET_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        mems (:obj:`List[torch.FloatTensor]` of length :obj:`config.n_layers`):
--- a/templates/adding_a_new_model/modeling_tf_xxx.py
+++ b/templates/adding_a_new_model/modeling_tf_xxx.py
@@ -310,7 +310,7 @@ XXX_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
--- a/templates/adding_a_new_model/modeling_xxx.py
+++ b/templates/adding_a_new_model/modeling_xxx.py
@@ -243,7 +243,7 @@ XXX_INPUTS_DOCSTRING = r"""
            Mask values selected in ``[0, 1]``:

            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **maked**.
+            - 0 for tokens that are **masked**.

            `What are attention masks? <../glossary.html#attention-mask>`__
        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):