From 62b8eb43c1b722f8c8a3c89fce5d788a08fc9653 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Mon, 15 Jul 2019 09:49:02 +0200 Subject: [PATCH] fix add_start_docstrings on python 2 (removed) --- pytorch_transformers/modeling_bert.py | 16 +++++++-------- pytorch_transformers/modeling_gpt2.py | 9 +++++---- pytorch_transformers/modeling_utils.py | 28 +++++++++++++++++--------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/pytorch_transformers/modeling_bert.py b/pytorch_transformers/modeling_bert.py index ea908b1ca0..f49aca0ddf 100644 --- a/pytorch_transformers/modeling_bert.py +++ b/pytorch_transformers/modeling_bert.py @@ -646,7 +646,7 @@ BERT_INPUTS_DOCSTRING = r""" @add_start_docstrings("The bare Bert Model transformer outputing raw hidden-states without any specific head on top.", BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING) class BertModel(BertPreTrainedModel): - __doc__ = r""" + r""" Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs: **last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)`` Sequence of hidden-states at the last layer of the model. @@ -742,7 +742,7 @@ class BertModel(BertPreTrainedModel): a `masked language modeling` head and a `next sentence prediction (classification)` head. """, BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING) class BertForPreTraining(BertPreTrainedModel): - __doc__ = r""" + r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss. Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) @@ -818,7 +818,7 @@ class BertForPreTraining(BertPreTrainedModel): @add_start_docstrings("""Bert Model transformer BERT model with a `language modeling` head on top. """, BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING) class BertForMaskedLM(BertPreTrainedModel): - __doc__ = r""" + r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss. Indices should be in ``[-1, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) @@ -883,7 +883,7 @@ class BertForMaskedLM(BertPreTrainedModel): @add_start_docstrings("""Bert Model transformer BERT model with a `next sentence prediction (classification)` head on top. """, BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING) class BertForNextSentencePrediction(BertPreTrainedModel): - __doc__ = r""" + r""" **next_sentence_label**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``: Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair (see ``input_ids`` docstring) Indices should be in ``[0, 1]``. @@ -941,7 +941,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel): the pooled output) e.g. for GLUE tasks. """, BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING) class BertForSequenceClassification(BertPreTrainedModel): - __doc__ = r""" + r""" **labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``: Labels for computing the sequence classification/regression loss. Indices should be in ``[0, ..., config.num_labels]``. @@ -1009,7 +1009,7 @@ class BertForSequenceClassification(BertPreTrainedModel): the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """, BERT_START_DOCSTRING) class BertForMultipleChoice(BertPreTrainedModel): - __doc__ = r""" + r""" Inputs: **input_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices, sequence_length)``: Indices of input sequence tokens in the vocabulary. @@ -1115,7 +1115,7 @@ class BertForMultipleChoice(BertPreTrainedModel): the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """, BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING) class BertForTokenClassification(BertPreTrainedModel): - __doc__ = r""" + r""" **labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels]``. @@ -1182,7 +1182,7 @@ class BertForTokenClassification(BertPreTrainedModel): the hidden-states output to compute `span start logits` and `span end logits`). """, BERT_START_DOCSTRING, BERT_INPUTS_DOCSTRING) class BertForQuestionAnswering(BertPreTrainedModel): - __doc__ = r""" + r""" **start_positions**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``: Position (index) of the start of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (`sequence_length`). diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py index 561228a47e..8aaf84a099 100644 --- a/pytorch_transformers/modeling_gpt2.py +++ b/pytorch_transformers/modeling_gpt2.py @@ -31,7 +31,8 @@ from torch.nn import CrossEntropyLoss from torch.nn.parameter import Parameter from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, - PreTrainedModel, prune_conv1d_layer, SequenceSummary) + PreTrainedModel, prune_conv1d_layer, SequenceSummary, + add_start_docstrings) from .modeling_bert import BertLayerNorm as LayerNorm logger = logging.getLogger(__name__) @@ -414,7 +415,7 @@ GPT2_INPUTS_DOCTRING = r""" Inputs: @add_start_docstrings("The bare GPT2 Model transformer outputing raw hidden-states without any specific head on top.", GPT2_START_DOCSTRING, GPT2_INPUTS_DOCTRING) class GPT2Model(GPT2PreTrainedModel): - __doc__ = r""" + r""" Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs: **last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)`` Sequence of hidden-states at the last layer of the model. @@ -539,7 +540,7 @@ class GPT2Model(GPT2PreTrainedModel): @add_start_docstrings("""The GPT2 Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings). """, GPT2_START_DOCSTRING, GPT2_INPUTS_DOCTRING) class GPT2LMHeadModel(GPT2PreTrainedModel): - __doc__ = r""" + r""" **lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set ``lm_labels = input_ids`` @@ -615,7 +616,7 @@ The language modeling head has its weights tied to the input embeddings, the classification head takes as input the input of a specified classification token index in the intput sequence). """, GPT2_START_DOCSTRING) class GPT2DoubleHeadsModel(GPT2PreTrainedModel): - __doc__ = r""" Inputs: + r""" Inputs: **input_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices, sequence_length)``: Indices of input sequence tokens in the vocabulary. The second dimension of the input (`num_choices`) indicates the number of choices to score. diff --git a/pytorch_transformers/modeling_utils.py b/pytorch_transformers/modeling_utils.py index 8971af306e..71fa9e3747 100644 --- a/pytorch_transformers/modeling_utils.py +++ b/pytorch_transformers/modeling_utils.py @@ -15,17 +15,20 @@ # limitations under the License. """PyTorch BERT model.""" -from __future__ import absolute_import, division, print_function, unicode_literals +from __future__ import (absolute_import, division, print_function, + unicode_literals) +import copy +import json import logging import os -import json -import copy from io import open +import six import torch from torch import nn -from torch.nn import CrossEntropyLoss, functional as F +from torch.nn import CrossEntropyLoss +from torch.nn import functional as F from .file_utils import cached_path @@ -36,11 +39,18 @@ WEIGHTS_NAME = "pytorch_model.bin" TF_WEIGHTS_NAME = 'model.ckpt' -def add_start_docstrings(*docstr): - def docstring_decorator(fn): - fn.__doc__ = ''.join(docstr) + fn.__doc__ - return fn - return docstring_decorator +if not six.PY2: + def add_start_docstrings(*docstr): + def docstring_decorator(fn): + fn.__doc__ = ''.join(docstr) + fn.__doc__ + return fn + return docstring_decorator +else: + # Not possible to update class docstrings on python2 + def add_start_docstrings(*docstr): + def docstring_decorator(fn): + return fn + return docstring_decorator class PretrainedConfig(object):