From c8933bb2d9f60885bb66c1a76de878bd5f7a8e9d Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Tue, 27 Aug 2019 12:10:36 +0300 Subject: [PATCH] Delete nonexistent parameter from documentation Changed documentation of GPT2Model, GPT2LMHeadModel and GPT2DoubleHeadsModel --- pytorch_transformers/modeling_gpt2.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py index 9022048d6d..35bb9112a6 100644 --- a/pytorch_transformers/modeling_gpt2.py +++ b/pytorch_transformers/modeling_gpt2.py @@ -408,10 +408,6 @@ GPT2_INPUTS_DOCSTRING = r""" Inputs: list of ``torch.FloatTensor`` (one for each layer): that contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model (see `past` output below). Can be used to speed up sequential decoding. - **attention_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``: - Mask to avoid performing attention on padding token indices. - Mask values selected in ``[0, 1]``: - ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens. **head_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(num_heads,)`` or ``(num_layers, num_heads)``: Mask to nullify selected heads of the self-attention modules. Mask values selected in ``[0, 1]``: @@ -642,10 +638,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): list of ``torch.FloatTensor`` (one for each layer): that contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model (see `past` output below). Can be used to speed up sequential decoding. - **attention_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, num_choices, sequence_length)``: - Mask to avoid performing attention on padding token indices. - Mask values selected in ``[0, 1]``: - ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens. **head_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(num_heads,)`` or ``(num_layers, num_heads)``: Mask to nullify selected heads of the self-attention modules. Mask values selected in ``[0, 1]``: