From 378142afdff17adab48d41d567c7f0100cf7ae23 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Wed, 28 Oct 2020 13:42:31 -0400 Subject: [PATCH] Rename add_start_docstrings_to_callable (#8120) --- .../pabee/modeling_pabee_albert.py | 6 +++--- .../pabee/modeling_pabee_bert.py | 6 +++--- examples/deebert/src/modeling_highway_bert.py | 6 +++--- .../deebert/src/modeling_highway_roberta.py | 4 ++-- .../emmental/modeling_bert_masked.py | 12 +++++------ src/transformers/file_utils.py | 2 +- src/transformers/modeling_albert.py | 16 +++++++-------- src/transformers/modeling_bart.py | 10 +++++----- src/transformers/modeling_bert.py | 20 +++++++++---------- src/transformers/modeling_bert_generation.py | 6 +++--- src/transformers/modeling_ctrl.py | 6 +++--- src/transformers/modeling_deberta.py | 6 +++--- src/transformers/modeling_distilbert.py | 16 ++++++++------- src/transformers/modeling_dpr.py | 13 ++++++++---- src/transformers/modeling_electra.py | 16 +++++++-------- src/transformers/modeling_encoder_decoder.py | 4 ++-- src/transformers/modeling_flaubert.py | 4 ++-- src/transformers/modeling_fsmt.py | 6 +++--- src/transformers/modeling_funnel.py | 18 ++++++++--------- src/transformers/modeling_gpt2.py | 10 +++++----- src/transformers/modeling_layoutlm.py | 8 ++++---- src/transformers/modeling_longformer.py | 16 ++++++++------- src/transformers/modeling_lxmert.py | 8 ++++---- src/transformers/modeling_mmbt.py | 4 ++-- src/transformers/modeling_mobilebert.py | 20 ++++++++++--------- src/transformers/modeling_openai.py | 10 +++++----- src/transformers/modeling_prophetnet.py | 17 ++++++++++------ src/transformers/modeling_rag.py | 14 ++++++------- src/transformers/modeling_reformer.py | 12 +++++------ src/transformers/modeling_roberta.py | 16 +++++++-------- src/transformers/modeling_squeezebert.py | 14 ++++++------- src/transformers/modeling_t5.py | 6 +++--- src/transformers/modeling_tf_albert.py | 16 +++++++-------- src/transformers/modeling_tf_bart.py | 6 +++--- src/transformers/modeling_tf_bert.py | 18 ++++++++--------- src/transformers/modeling_tf_ctrl.py | 6 +++--- src/transformers/modeling_tf_distilbert.py | 16 ++++++++------- src/transformers/modeling_tf_electra.py | 16 +++++++-------- src/transformers/modeling_tf_flaubert.py | 11 +++++++--- src/transformers/modeling_tf_funnel.py | 18 ++++++++--------- src/transformers/modeling_tf_gpt2.py | 8 ++++---- src/transformers/modeling_tf_longformer.py | 8 ++++---- src/transformers/modeling_tf_lxmert.py | 6 +++--- src/transformers/modeling_tf_mobilebert.py | 20 ++++++++++--------- src/transformers/modeling_tf_openai.py | 8 ++++---- src/transformers/modeling_tf_roberta.py | 14 ++++++------- src/transformers/modeling_tf_t5.py | 6 +++--- src/transformers/modeling_tf_transfo_xl.py | 11 +++++++--- src/transformers/modeling_tf_xlm.py | 14 ++++++------- src/transformers/modeling_tf_xlnet.py | 14 ++++++------- src/transformers/modeling_transfo_xl.py | 11 +++++++--- src/transformers/modeling_xlm.py | 16 +++++++-------- src/transformers/modeling_xlnet.py | 16 +++++++-------- .../adding_a_new_model/modeling_tf_xxx.py | 14 ++++++------- templates/adding_a_new_model/modeling_xxx.py | 14 ++++++------- 55 files changed, 327 insertions(+), 292 deletions(-) diff --git a/examples/bert-loses-patience/pabee/modeling_pabee_albert.py b/examples/bert-loses-patience/pabee/modeling_pabee_albert.py index 383b2c20eb..48abd8a73b 100644 --- a/examples/bert-loses-patience/pabee/modeling_pabee_albert.py +++ b/examples/bert-loses-patience/pabee/modeling_pabee_albert.py @@ -20,7 +20,7 @@ import torch import torch.nn as nn from torch.nn import CrossEntropyLoss, MSELoss -from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable +from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_albert import ( ALBERT_INPUTS_DOCSTRING, ALBERT_START_DOCSTRING, @@ -87,7 +87,7 @@ class AlbertModelWithPabee(AlbertModel): message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***" print(message) - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, @@ -230,7 +230,7 @@ class AlbertForSequenceClassificationWithPabee(AlbertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, diff --git a/examples/bert-loses-patience/pabee/modeling_pabee_bert.py b/examples/bert-loses-patience/pabee/modeling_pabee_bert.py index e44e367e9b..78de015b1d 100644 --- a/examples/bert-loses-patience/pabee/modeling_pabee_bert.py +++ b/examples/bert-loses-patience/pabee/modeling_pabee_bert.py @@ -22,7 +22,7 @@ import torch from torch import nn from torch.nn import CrossEntropyLoss, MSELoss -from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable +from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_bert import ( BERT_INPUTS_DOCSTRING, BERT_START_DOCSTRING, @@ -92,7 +92,7 @@ class BertModelWithPabee(BertModel): message = f"*** Patience = {self.patience} Avg. Inference Layers = {avg_inf_layers:.2f} Speed Up = {1 - avg_inf_layers / self.config.num_hidden_layers:.2f} ***" print(message) - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, @@ -254,7 +254,7 @@ class BertForSequenceClassificationWithPabee(BertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, diff --git a/examples/deebert/src/modeling_highway_bert.py b/examples/deebert/src/modeling_highway_bert.py index fb3393dca6..84fc10bd0e 100644 --- a/examples/deebert/src/modeling_highway_bert.py +++ b/examples/deebert/src/modeling_highway_bert.py @@ -2,7 +2,7 @@ import torch from torch import nn from torch.nn import CrossEntropyLoss, MSELoss -from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable +from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_bert import ( BERT_INPUTS_DOCSTRING, BERT_START_DOCSTRING, @@ -134,7 +134,7 @@ class DeeBertModel(BertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, @@ -288,7 +288,7 @@ class DeeBertForSequenceClassification(BertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, diff --git a/examples/deebert/src/modeling_highway_roberta.py b/examples/deebert/src/modeling_highway_roberta.py index 971dcbb6e1..643da941e2 100644 --- a/examples/deebert/src/modeling_highway_roberta.py +++ b/examples/deebert/src/modeling_highway_roberta.py @@ -4,7 +4,7 @@ import torch.nn as nn from torch.nn import CrossEntropyLoss, MSELoss from transformers.configuration_roberta import RobertaConfig -from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable +from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_roberta import ROBERTA_INPUTS_DOCSTRING, ROBERTA_START_DOCSTRING, RobertaEmbeddings from .modeling_highway_bert import BertPreTrainedModel, DeeBertModel, HighwayException, entropy @@ -45,7 +45,7 @@ class DeeRobertaForSequenceClassification(BertPreTrainedModel): self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING) def forward( self, input_ids=None, diff --git a/examples/movement-pruning/emmental/modeling_bert_masked.py b/examples/movement-pruning/emmental/modeling_bert_masked.py index bfa8d7b487..bcc8690c39 100644 --- a/examples/movement-pruning/emmental/modeling_bert_masked.py +++ b/examples/movement-pruning/emmental/modeling_bert_masked.py @@ -28,7 +28,7 @@ from torch.nn import CrossEntropyLoss, MSELoss from emmental import MaskedBertConfig from emmental.modules import MaskedLinear -from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_callable +from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward from transformers.modeling_bert import ACT2FN, BertLayerNorm, load_tf_weights_in_bert from transformers.modeling_utils import PreTrainedModel, prune_linear_layer @@ -498,7 +498,7 @@ class MaskedBertModel(MaskedBertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, @@ -671,7 +671,7 @@ class MaskedBertForSequenceClassification(MaskedBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, @@ -756,7 +756,7 @@ class MaskedBertForMultipleChoice(MaskedBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, @@ -846,7 +846,7 @@ class MaskedBertForTokenClassification(MaskedBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, @@ -932,7 +932,7 @@ class MaskedBertForQuestionAnswering(MaskedBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MASKED_BERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(MASKED_BERT_INPUTS_DOCSTRING) def forward( self, input_ids=None, diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index e51d9f827e..a422c628af 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -425,7 +425,7 @@ def add_start_docstrings(*docstr): return docstring_decorator -def add_start_docstrings_to_callable(*docstr): +def add_start_docstrings_to_model_forward(*docstr): def docstring_decorator(fn): class_name = ":class:`~transformers.{}`".format(fn.__qualname__.split(".")[0]) intro = " The {} forward method, overrides the :func:`__call__` special method.".format(class_name) diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index ff5744e1fa..2a9d51c04d 100755 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -30,7 +30,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -631,7 +631,7 @@ class AlbertModel(AlbertPreTrainedModel): inner_group_idx = int(layer - group_idx * self.config.inner_group_num) self.encoder.albert_layer_groups[group_idx].albert_layers[inner_group_idx].attention.prune_heads(heads) - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -727,7 +727,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel): def get_input_embeddings(self): return self.albert.embeddings.word_embeddings - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=AlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -879,7 +879,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel): def get_input_embeddings(self): return self.albert.embeddings.word_embeddings - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -967,7 +967,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -1055,7 +1055,7 @@ class AlbertForTokenClassification(AlbertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -1143,7 +1143,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -1242,7 +1242,7 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", diff --git a/src/transformers/modeling_bart.py b/src/transformers/modeling_bart.py index 78a840cea5..08c95fb6bf 100644 --- a/src/transformers/modeling_bart.py +++ b/src/transformers/modeling_bart.py @@ -30,7 +30,7 @@ from .file_utils import ( add_code_sample_docstrings, add_end_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -846,7 +846,7 @@ class BartModel(PretrainedBartModel): self.init_weights() - @add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="facebook/bart-large", @@ -981,7 +981,7 @@ class BartForConditionalGeneration(PretrainedBartModel): new_bias = torch.cat([self.final_logits_bias, extra_bias], dim=1) self.register_buffer("final_logits_bias", new_bias) - @add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @add_end_docstrings(BART_GENERATION_EXAMPLE) def forward( @@ -1147,7 +1147,7 @@ class BartForSequenceClassification(PretrainedBartModel): self.model._init_weights(self.classification_head.dense) self.model._init_weights(self.classification_head.out_proj) - @add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="facebook/bart-large", @@ -1234,7 +1234,7 @@ class BartForQuestionAnswering(PretrainedBartModel): self.model._init_weights(self.qa_outputs) - @add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="facebook/bart-large", diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index 7deaa5bc67..7a06e159db 100755 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -33,7 +33,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -748,7 +748,7 @@ class BertModel(BertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased", @@ -870,7 +870,7 @@ class BertForPreTraining(BertPreTrainedModel): def get_output_embeddings(self): return self.cls.predictions.decoder - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=BertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -983,7 +983,7 @@ class BertLMHeadModel(BertPreTrainedModel): def get_output_embeddings(self): return self.cls.predictions.decoder - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1103,7 +1103,7 @@ class BertForMaskedLM(BertPreTrainedModel): def get_output_embeddings(self): return self.cls.predictions.decoder - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased", @@ -1206,7 +1206,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=NextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1300,7 +1300,7 @@ class BertForSequenceClassification(BertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased", @@ -1384,7 +1384,7 @@ class BertForMultipleChoice(BertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased", @@ -1479,7 +1479,7 @@ class BertForTokenClassification(BertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased", @@ -1569,7 +1569,7 @@ class BertForQuestionAnswering(BertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased", diff --git a/src/transformers/modeling_bert_generation.py b/src/transformers/modeling_bert_generation.py index b50f6491df..f201c1bd85 100755 --- a/src/transformers/modeling_bert_generation.py +++ b/src/transformers/modeling_bert_generation.py @@ -24,7 +24,7 @@ from .configuration_bert_generation import BertGenerationConfig from .file_utils import ( add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_bert import BertEncoder @@ -293,7 +293,7 @@ class BertGenerationEncoder(BertGenerationPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/bert_for_seq_generation_L-24_bbc_encoder", @@ -421,7 +421,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel): def get_output_embeddings(self): return self.lm_head.decoder - @add_start_docstrings_to_callable(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/modeling_ctrl.py b/src/transformers/modeling_ctrl.py index 5597bb9296..c3ee2f4149 100644 --- a/src/transformers/modeling_ctrl.py +++ b/src/transformers/modeling_ctrl.py @@ -24,7 +24,7 @@ import torch.nn as nn from torch.nn import CrossEntropyLoss from .configuration_ctrl import CTRLConfig -from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from .modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast from .modeling_utils import Conv1D, PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer from .utils import logging @@ -349,7 +349,7 @@ class CTRLModel(CTRLPreTrainedModel): for layer, heads in heads_to_prune.items(): self.h[layer].multi_head_attention.prune_heads(heads) - @add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="ctrl", @@ -521,7 +521,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): return {"input_ids": input_ids, "past_key_values": past, "use_cache": kwargs["use_cache"]} - @add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="ctrl", diff --git a/src/transformers/modeling_deberta.py b/src/transformers/modeling_deberta.py index a5558f31ea..c0da8073f8 100644 --- a/src/transformers/modeling_deberta.py +++ b/src/transformers/modeling_deberta.py @@ -24,7 +24,7 @@ from torch.nn import CrossEntropyLoss from .activations import ACT2FN from .configuration_deberta import DebertaConfig -from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from .modeling_outputs import BaseModelOutput, SequenceClassifierOutput from .modeling_utils import PreTrainedModel from .utils import logging @@ -858,7 +858,7 @@ class DebertaModel(DebertaPreTrainedModel): """ raise NotImplementedError("The prune function is not implemented in DeBERTa model.") - @add_start_docstrings_to_callable(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="microsoft/deberta-base", @@ -976,7 +976,7 @@ class DebertaForSequenceClassification(DebertaPreTrainedModel): def set_input_embeddings(self, new_embeddings): self.deberta.set_input_embeddings(new_embeddings) - @add_start_docstrings_to_callable(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="microsoft/deberta-base", diff --git a/src/transformers/modeling_distilbert.py b/src/transformers/modeling_distilbert.py index e946761626..d8ee191a09 100755 --- a/src/transformers/modeling_distilbert.py +++ b/src/transformers/modeling_distilbert.py @@ -32,7 +32,7 @@ from .configuration_distilbert import DistilBertConfig from .file_utils import ( add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -436,7 +436,7 @@ class DistilBertModel(DistilBertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.transformer.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -509,7 +509,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): def get_output_embeddings(self): return self.vocab_projector - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -595,7 +595,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -676,7 +676,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -772,7 +772,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -856,7 +856,9 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward( + DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") + ) @replace_return_docstrings(output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/modeling_dpr.py b/src/transformers/modeling_dpr.py index cadc728126..0de028d303 100644 --- a/src/transformers/modeling_dpr.py +++ b/src/transformers/modeling_dpr.py @@ -22,7 +22,12 @@ import torch from torch import Tensor, nn from .configuration_dpr import DPRConfig -from .file_utils import ModelOutput, add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings +from .file_utils import ( + ModelOutput, + add_start_docstrings, + add_start_docstrings_to_model_forward, + replace_return_docstrings, +) from .modeling_bert import BertModel from .modeling_outputs import BaseModelOutputWithPooling from .modeling_utils import PreTrainedModel @@ -431,7 +436,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder): self.ctx_encoder = DPREncoder(config) self.init_weights() - @add_start_docstrings_to_callable(DPR_ENCODERS_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(DPR_ENCODERS_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=DPRContextEncoderOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -509,7 +514,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder): self.question_encoder = DPREncoder(config) self.init_weights() - @add_start_docstrings_to_callable(DPR_ENCODERS_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(DPR_ENCODERS_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=DPRQuestionEncoderOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -586,7 +591,7 @@ class DPRReader(DPRPretrainedReader): self.span_predictor = DPRSpanPredictor(config) self.init_weights() - @add_start_docstrings_to_callable(DPR_READER_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(DPR_READER_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=DPRReaderOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/modeling_electra.py b/src/transformers/modeling_electra.py index c8f2122a3c..dac498a366 100644 --- a/src/transformers/modeling_electra.py +++ b/src/transformers/modeling_electra.py @@ -30,7 +30,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -693,7 +693,7 @@ class ElectraModel(ElectraPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -791,7 +791,7 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -873,7 +873,7 @@ class ElectraForPreTraining(ElectraPreTrainedModel): self.discriminator_predictions = ElectraDiscriminatorPredictions(config) self.init_weights() - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=ElectraForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -971,7 +971,7 @@ class ElectraForMaskedLM(ElectraPreTrainedModel): def get_output_embeddings(self): return self.generator_lm_head - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -1060,7 +1060,7 @@ class ElectraForTokenClassification(ElectraPreTrainedModel): self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights() - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -1147,7 +1147,7 @@ class ElectraForQuestionAnswering(ElectraPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -1248,7 +1248,7 @@ class ElectraForMultipleChoice(ElectraPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", diff --git a/src/transformers/modeling_encoder_decoder.py b/src/transformers/modeling_encoder_decoder.py index 9fe504173d..6d6e3f788b 100644 --- a/src/transformers/modeling_encoder_decoder.py +++ b/src/transformers/modeling_encoder_decoder.py @@ -19,7 +19,7 @@ from typing import Optional from .configuration_encoder_decoder import EncoderDecoderConfig from .configuration_utils import PretrainedConfig -from .file_utils import add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings +from .file_utils import add_start_docstrings, add_start_docstrings_to_model_forward, replace_return_docstrings from .modeling_outputs import Seq2SeqLMOutput from .modeling_utils import PreTrainedModel from .utils import logging @@ -335,7 +335,7 @@ class EncoderDecoderModel(PreTrainedModel): config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs) return cls(encoder=encoder, decoder=decoder, config=config) - @add_start_docstrings_to_callable(ENCODER_DECODER_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(ENCODER_DECODER_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/modeling_flaubert.py b/src/transformers/modeling_flaubert.py index bcbc0ac541..b8d23cab53 100644 --- a/src/transformers/modeling_flaubert.py +++ b/src/transformers/modeling_flaubert.py @@ -21,7 +21,7 @@ import torch from torch.nn import functional as F from .configuration_flaubert import FlaubertConfig -from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from .modeling_outputs import BaseModelOutput from .modeling_xlm import ( XLMForMultipleChoice, @@ -140,7 +140,7 @@ class FlaubertModel(XLMModel): self.layerdrop = getattr(config, "layerdrop", 0.0) self.pre_norm = getattr(config, "pre_norm", False) - @add_start_docstrings_to_callable(FLAUBERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="flaubert/flaubert_base_cased", diff --git a/src/transformers/modeling_fsmt.py b/src/transformers/modeling_fsmt.py index 6174cef8fa..800eec9b7e 100644 --- a/src/transformers/modeling_fsmt.py +++ b/src/transformers/modeling_fsmt.py @@ -43,7 +43,7 @@ from .file_utils import ( add_code_sample_docstrings, add_end_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import BaseModelOutput, BaseModelOutputWithPast, Seq2SeqLMOutput, Seq2SeqModelOutput @@ -899,7 +899,7 @@ class FSMTModel(PretrainedFSMTModel): self.init_weights() - @add_start_docstrings_to_callable(FSMT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(FSMT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="facebook/wmt19-ru-en", @@ -1039,7 +1039,7 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel): return new_embeddings - @add_start_docstrings_to_callable(FSMT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(FSMT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) @add_end_docstrings(FSMT_GENERATION_EXAMPLE) def forward( diff --git a/src/transformers/modeling_funnel.py b/src/transformers/modeling_funnel.py index aff9fc0be2..a96b62ba04 100644 --- a/src/transformers/modeling_funnel.py +++ b/src/transformers/modeling_funnel.py @@ -30,7 +30,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -906,7 +906,7 @@ class FunnelBaseModel(FunnelPreTrainedModel): def set_input_embeddings(self, new_embeddings): self.embeddings.word_embeddings = new_embeddings - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small-base", @@ -983,7 +983,7 @@ class FunnelModel(FunnelPreTrainedModel): def set_input_embeddings(self, new_embeddings): self.embeddings.word_embeddings = new_embeddings - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small", @@ -1082,7 +1082,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel): self.discriminator_predictions = FunnelDiscriminatorPredictions(config) self.init_weights() - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=FunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1167,7 +1167,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel): def get_output_embeddings(self): return self.lm_head - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small", @@ -1240,7 +1240,7 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel): self.classifier = FunnelClassificationHead(config, config.num_labels) self.init_weights() - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small-base", @@ -1317,7 +1317,7 @@ class FunnelForMultipleChoice(FunnelPreTrainedModel): self.classifier = FunnelClassificationHead(config, 1) self.init_weights() - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small-base", @@ -1403,7 +1403,7 @@ class FunnelForTokenClassification(FunnelPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small", @@ -1485,7 +1485,7 @@ class FunnelForQuestionAnswering(FunnelPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small", diff --git a/src/transformers/modeling_gpt2.py b/src/transformers/modeling_gpt2.py index 4f8ea4c7b3..22c7bd5919 100644 --- a/src/transformers/modeling_gpt2.py +++ b/src/transformers/modeling_gpt2.py @@ -30,7 +30,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast @@ -502,7 +502,7 @@ class GPT2Model(GPT2PreTrainedModel): for layer, heads in heads_to_prune.items(): self.h[layer].attn.prune_heads(heads) - @add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="gpt2", @@ -723,7 +723,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): "attention_mask": attention_mask, } - @add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="gpt2", @@ -837,7 +837,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): "use_cache": kwargs.get("use_cache"), } - @add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=GPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -987,7 +987,7 @@ class GPT2ForSequenceClassification(GPT2PreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="microsoft/dialogrpt", diff --git a/src/transformers/modeling_layoutlm.py b/src/transformers/modeling_layoutlm.py index 4e2319cc91..29ff2ce77c 100644 --- a/src/transformers/modeling_layoutlm.py +++ b/src/transformers/modeling_layoutlm.py @@ -23,7 +23,7 @@ from torch.nn import CrossEntropyLoss from .activations import ACT2FN from .configuration_layoutlm import LayoutLMConfig -from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from .modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, MaskedLMOutput, TokenClassifierOutput from .modeling_utils import ( PreTrainedModel, @@ -607,7 +607,7 @@ class LayoutLMModel(LayoutLMPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="layoutlm-base-uncased", @@ -744,7 +744,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel): def get_output_embeddings(self): return self.cls.predictions.decoder - @add_start_docstrings_to_callable(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="layoutlm-base-uncased", @@ -832,7 +832,7 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel): def get_input_embeddings(self): return self.layoutlm.embeddings.word_embeddings - @add_start_docstrings_to_callable(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(LAYOUTLM_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="layoutlm-base-uncased", diff --git a/src/transformers/modeling_longformer.py b/src/transformers/modeling_longformer.py index 754f312d1f..2a2f7e1634 100755 --- a/src/transformers/modeling_longformer.py +++ b/src/transformers/modeling_longformer.py @@ -27,7 +27,7 @@ from .configuration_longformer import LongformerConfig from .file_utils import ( add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -1181,7 +1181,7 @@ class LongformerModel(LongformerPreTrainedModel): attention_mask = global_attention_mask + 1 return attention_mask - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1308,7 +1308,7 @@ class LongformerForMaskedLM(LongformerPreTrainedModel): def get_output_embeddings(self): return self.lm_head.decoder - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1412,7 +1412,7 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="allenai/longformer-base-4096", @@ -1521,7 +1521,7 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1655,7 +1655,7 @@ class LongformerForTokenClassification(LongformerPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="allenai/longformer-base-4096", @@ -1742,7 +1742,9 @@ class LongformerForMultipleChoice(LongformerPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward( + LONGFORMER_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") + ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="allenai/longformer-base-4096", diff --git a/src/transformers/modeling_lxmert.py b/src/transformers/modeling_lxmert.py index cacbd56fcb..e917c916f4 100644 --- a/src/transformers/modeling_lxmert.py +++ b/src/transformers/modeling_lxmert.py @@ -30,7 +30,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_utils import PreTrainedModel @@ -893,7 +893,7 @@ class LxmertModel(LxmertPreTrainedModel): def set_input_embeddings(self, new_embeddings): self.embeddings.word_embeddings = new_embeddings - @add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="unc-nlp/lxmert-base-uncased", @@ -1145,7 +1145,7 @@ class LxmertForPreTraining(LxmertPreTrainedModel): return new_qa_logit_layer - @add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=LxmertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1368,7 +1368,7 @@ class LxmertForQuestionAnswering(LxmertPreTrainedModel): return new_qa_logit_layer - @add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="unc-nlp/lxmert-base-uncased", diff --git a/src/transformers/modeling_mmbt.py b/src/transformers/modeling_mmbt.py index c2ef997eeb..53d9e7535f 100644 --- a/src/transformers/modeling_mmbt.py +++ b/src/transformers/modeling_mmbt.py @@ -20,7 +20,7 @@ import torch import torch.nn as nn from torch.nn import CrossEntropyLoss, MSELoss -from .file_utils import add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings +from .file_utils import add_start_docstrings, add_start_docstrings_to_model_forward, replace_return_docstrings from .modeling_outputs import BaseModelOutputWithPooling, SequenceClassifierOutput from .modeling_utils import ModuleUtilsMixin from .utils import logging @@ -187,7 +187,7 @@ class MMBTModel(nn.Module, ModuleUtilsMixin): self.transformer = transformer self.modal_encoder = ModalEmbeddings(config, encoder, transformer.embeddings) - @add_start_docstrings_to_callable(MMBT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(MMBT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/modeling_mobilebert.py b/src/transformers/modeling_mobilebert.py index 4ed636251a..8d744b4258 100644 --- a/src/transformers/modeling_mobilebert.py +++ b/src/transformers/modeling_mobilebert.py @@ -37,7 +37,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -837,7 +837,7 @@ class MobileBertModel(MobileBertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -970,7 +970,7 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel): if output_embeddings is not None and self.config.tie_word_embeddings: self._tie_or_clone_weights(output_embeddings, self.get_input_embeddings()) - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=MobileBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1088,7 +1088,7 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel): if output_embeddings is not None and self.config.tie_word_embeddings: self._tie_or_clone_weights(output_embeddings, self.get_input_embeddings()) - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -1184,7 +1184,7 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=NextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1276,7 +1276,7 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -1361,7 +1361,7 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -1460,7 +1460,9 @@ class MobileBertForMultipleChoice(MobileBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward( + MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") + ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -1555,7 +1557,7 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", diff --git a/src/transformers/modeling_openai.py b/src/transformers/modeling_openai.py index 7ff6bfbd27..b69ef557b9 100644 --- a/src/transformers/modeling_openai.py +++ b/src/transformers/modeling_openai.py @@ -33,7 +33,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput @@ -427,7 +427,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): for layer, heads in heads_to_prune.items(): self.h[layer].attn.prune_heads(heads) - @add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt", @@ -543,7 +543,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): def get_output_embeddings(self): return self.lm_head - @add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt", @@ -629,7 +629,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): def get_output_embeddings(self): return self.lm_head - @add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=OpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -754,7 +754,7 @@ class OpenAIGPTForSequenceClassification(OpenAIGPTPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt", diff --git a/src/transformers/modeling_prophetnet.py b/src/transformers/modeling_prophetnet.py index 0e43fac220..57e4e4c6a3 100644 --- a/src/transformers/modeling_prophetnet.py +++ b/src/transformers/modeling_prophetnet.py @@ -25,7 +25,12 @@ from torch import Tensor, nn from .activations import ACT2FN from .configuration_prophetnet import ProphetNetConfig -from .file_utils import ModelOutput, add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings +from .file_utils import ( + ModelOutput, + add_start_docstrings, + add_start_docstrings_to_model_forward, + replace_return_docstrings, +) from .modeling_outputs import BaseModelOutput from .modeling_utils import PreTrainedModel from .utils import logging @@ -1138,7 +1143,7 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel): def set_input_embeddings(self, value): self.word_embeddings = value - @add_start_docstrings_to_callable(PROPHETNET_STANDALONE_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(PROPHETNET_STANDALONE_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1254,7 +1259,7 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel): def set_input_embeddings(self, value): self.word_embeddings = value - @add_start_docstrings_to_callable(PROPHETNET_STANDALONE_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(PROPHETNET_STANDALONE_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=ProphetNetDecoderModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1570,7 +1575,7 @@ class ProphetNetModel(ProphetNetPreTrainedModel): def get_decoder(self): return self.decoder - @add_start_docstrings_to_callable(PROPHETNET_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(PROPHETNET_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=ProphetNetSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1674,7 +1679,7 @@ class ProphetNetForConditionalGeneration(ProphetNetPreTrainedModel): def get_input_embeddings(self): return self.prophetnet.word_embeddings - @add_start_docstrings_to_callable(PROPHETNET_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(PROPHETNET_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=ProphetNetSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1865,7 +1870,7 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel): def get_output_embeddings(self): return self.lm_head - @add_start_docstrings_to_callable(PROPHETNET_STANDALONE_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(PROPHETNET_STANDALONE_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=ProphetNetDecoderLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/modeling_rag.py b/src/transformers/modeling_rag.py index f8b6215f78..c5809e1436 100644 --- a/src/transformers/modeling_rag.py +++ b/src/transformers/modeling_rag.py @@ -21,7 +21,7 @@ import torch from .configuration_rag import RagConfig from .configuration_utils import PretrainedConfig -from .file_utils import add_start_docstrings_to_callable, replace_return_docstrings +from .file_utils import add_start_docstrings_to_model_forward, replace_return_docstrings from .modeling_outputs import ModelOutput from .modeling_utils import PreTrainedModel from .retrieval_rag import RagRetriever @@ -459,7 +459,7 @@ RAG_FORWARD_INPUTS_DOCSTRING = r""" """ -@add_start_docstrings_to_callable(RAG_START_DOCSTRING) +@add_start_docstrings_to_model_forward(RAG_START_DOCSTRING) class RagModel(RagPreTrainedModel): def __init__( self, @@ -502,7 +502,7 @@ class RagModel(RagPreTrainedModel): self.question_encoder = question_encoder self.generator = generator - @add_start_docstrings_to_callable(RAG_FORWARD_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(RAG_FORWARD_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=RetrievAugLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -658,7 +658,7 @@ class RagModel(RagPreTrainedModel): ) -@add_start_docstrings_to_callable( +@add_start_docstrings_to_model_forward( """ A RAG-sequence model impementation. It performs RAG-sequence specific marginalization in the forward pass. """, @@ -687,7 +687,7 @@ class RagSequenceForGeneration(RagPreTrainedModel): def set_retriever(self, retriever: RagRetriever): self.rag.retriever = retriever - @add_start_docstrings_to_callable(RAG_FORWARD_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(RAG_FORWARD_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=RetrievAugLMMarginOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -984,7 +984,7 @@ class RagSequenceForGeneration(RagPreTrainedModel): return output -@add_start_docstrings_to_callable( +@add_start_docstrings_to_model_forward( """ A RAG-token model impementation. It performs RAG-token specific marginalization in the forward pass. """, @@ -1080,7 +1080,7 @@ class RagTokenForGeneration(RagPreTrainedModel): log_prob_sum = seq_logprobs + doc_logprobs.unsqueeze(-1).unsqueeze(-1) return torch.logsumexp(log_prob_sum, dim=1) - @add_start_docstrings_to_callable(RAG_FORWARD_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(RAG_FORWARD_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=RetrievAugLMMarginOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/modeling_reformer.py b/src/transformers/modeling_reformer.py index 986152bcbb..c31a92087d 100755 --- a/src/transformers/modeling_reformer.py +++ b/src/transformers/modeling_reformer.py @@ -36,7 +36,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, ) from .modeling_outputs import CausalLMOutput, MaskedLMOutput, QuestionAnsweringModelOutput, SequenceClassifierOutput from .modeling_utils import PreTrainedModel, apply_chunking_to_forward @@ -1991,7 +1991,7 @@ class ReformerModel(ReformerPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment", @@ -2195,7 +2195,7 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel): def get_output_embeddings(self): return self.lm_head.decoder - @add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment", @@ -2309,7 +2309,7 @@ class ReformerForMaskedLM(ReformerPreTrainedModel): def get_output_embeddings(self): return self.lm_head.decoder - @add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment", @@ -2389,7 +2389,7 @@ class ReformerForSequenceClassification(ReformerPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment", @@ -2491,7 +2491,7 @@ class ReformerForQuestionAnswering(ReformerPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment", diff --git a/src/transformers/modeling_roberta.py b/src/transformers/modeling_roberta.py index 5ba435bcb1..1f676b9fef 100644 --- a/src/transformers/modeling_roberta.py +++ b/src/transformers/modeling_roberta.py @@ -27,7 +27,7 @@ from .configuration_roberta import RobertaConfig from .file_utils import ( add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -595,7 +595,7 @@ class RobertaModel(RobertaPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -718,7 +718,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel): def get_output_embeddings(self): return self.lm_head.decoder - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -838,7 +838,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel): def get_output_embeddings(self): return self.lm_head.decoder - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -956,7 +956,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -1039,7 +1039,7 @@ class RobertaForMultipleChoice(RobertaPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -1133,7 +1133,7 @@ class RobertaForTokenClassification(RobertaPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -1242,7 +1242,7 @@ class RobertaForQuestionAnswering(RobertaPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", diff --git a/src/transformers/modeling_squeezebert.py b/src/transformers/modeling_squeezebert.py index 67e97deb0f..620280a316 100644 --- a/src/transformers/modeling_squeezebert.py +++ b/src/transformers/modeling_squeezebert.py @@ -23,7 +23,7 @@ from torch.nn import CrossEntropyLoss, MSELoss from .activations import ACT2FN from .configuration_squeezebert import SqueezeBertConfig -from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from .modeling_outputs import ( BaseModelOutput, BaseModelOutputWithPooling, @@ -518,7 +518,7 @@ class SqueezeBertModel(SqueezeBertPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="squeezebert/squeezebert-mnli-headless", @@ -605,7 +605,7 @@ class SqueezeBertForMaskedLM(SqueezeBertPreTrainedModel): def get_output_embeddings(self): return self.lm_head - @add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="squeezebert/squeezebert-uncased", @@ -683,7 +683,7 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="squeezebert/squeezebert-mnli-headless", @@ -767,7 +767,7 @@ class SqueezeBertForMultipleChoice(SqueezeBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable( + @add_start_docstrings_to_model_forward( SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)") ) @add_code_sample_docstrings( @@ -861,7 +861,7 @@ class SqueezeBertForTokenClassification(SqueezeBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="squeezebert/squeezebert-mnli-headless", @@ -948,7 +948,7 @@ class SqueezeBertForQuestionAnswering(SqueezeBertPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) + @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="squeezebert/squeezebert-mnli-headless", diff --git a/src/transformers/modeling_t5.py b/src/transformers/modeling_t5.py index 8e50bd85a1..38712c43a8 100644 --- a/src/transformers/modeling_t5.py +++ b/src/transformers/modeling_t5.py @@ -30,7 +30,7 @@ from .file_utils import ( DUMMY_INPUTS, DUMMY_MASK, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import BaseModelOutput, BaseModelOutputWithPast, Seq2SeqLMOutput, Seq2SeqModelOutput @@ -943,7 +943,7 @@ class T5Model(T5PreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(T5_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(T5_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1086,7 +1086,7 @@ class T5ForConditionalGeneration(T5PreTrainedModel): def get_decoder(self): return self.decoder - @add_start_docstrings_to_callable(T5_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(T5_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/src/transformers/modeling_tf_albert.py b/src/transformers/modeling_tf_albert.py index c30f876cd4..0159ed7139 100644 --- a/src/transformers/modeling_tf_albert.py +++ b/src/transformers/modeling_tf_albert.py @@ -28,7 +28,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_outputs import ( @@ -747,7 +747,7 @@ class TFAlbertModel(TFAlbertPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.albert = TFAlbertMainLayer(config, name="albert") - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -778,7 +778,7 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel): def get_output_embeddings(self): return self.albert.embeddings - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFAlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call(self, inputs, **kwargs): r""" @@ -847,7 +847,7 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss) def get_output_embeddings(self): return self.albert.embeddings - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -930,7 +930,7 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -1018,7 +1018,7 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -1104,7 +1104,7 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs" ) - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", @@ -1212,7 +1212,7 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss): """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="albert-base-v2", diff --git a/src/transformers/modeling_tf_bart.py b/src/transformers/modeling_tf_bart.py index a2e05cf971..64ce19f08a 100644 --- a/src/transformers/modeling_tf_bart.py +++ b/src/transformers/modeling_tf_bart.py @@ -25,7 +25,7 @@ from tensorflow.keras.layers import Dense, LayerNormalization from .activations_tf import ACT2FN from .configuration_bart import BartConfig -from .file_utils import add_start_docstrings, add_start_docstrings_to_callable, replace_return_docstrings +from .file_utils import add_start_docstrings, add_start_docstrings_to_model_forward, replace_return_docstrings from .modeling_tf_outputs import TFBaseModelOutput, TFBaseModelOutputWithPast, TFSeq2SeqLMOutput, TFSeq2SeqModelOutput # Public API @@ -827,7 +827,7 @@ class TFBartModel(TFPretrainedBartModel): causal_lm_mask = causal_attention_mask(tgt_len, tgt_len, mask_dtype) return decoder_input_ids, decoder_padding_mask, causal_lm_mask - @add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, @@ -961,7 +961,7 @@ class TFBartForConditionalGeneration(TFPretrainedBartModel): self.model = TFBartModel(config, name="model") self.use_cache = config.use_cache - @add_start_docstrings_to_callable(BART_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, diff --git a/src/transformers/modeling_tf_bert.py b/src/transformers/modeling_tf_bert.py index b17540eba4..f4e9a622b2 100644 --- a/src/transformers/modeling_tf_bert.py +++ b/src/transformers/modeling_tf_bert.py @@ -28,7 +28,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_outputs import ( @@ -793,7 +793,7 @@ class TFBertModel(TFBertPreTrainedModel): self.bert = TFBertMainLayer(config, name="bert") - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased", @@ -824,7 +824,7 @@ class TFBertForPreTraining(TFBertPreTrainedModel): def get_output_embeddings(self): return self.bert.embeddings - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call(self, inputs, **kwargs): r""" @@ -881,7 +881,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): def get_output_embeddings(self): return self.bert.embeddings - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased", @@ -1043,7 +1043,7 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel): self.bert = TFBertMainLayer(config, name="bert") self.nsp = TFBertNSPHead(config, name="nsp___cls") - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) def call(self, inputs, **kwargs): r""" @@ -1098,7 +1098,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased", @@ -1191,7 +1191,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss): """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased", @@ -1315,7 +1315,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased", @@ -1400,7 +1400,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss) config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs" ) - @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-cased", diff --git a/src/transformers/modeling_tf_ctrl.py b/src/transformers/modeling_tf_ctrl.py index b277161cec..804614f16a 100644 --- a/src/transformers/modeling_tf_ctrl.py +++ b/src/transformers/modeling_tf_ctrl.py @@ -20,7 +20,7 @@ import numpy as np import tensorflow as tf from .configuration_ctrl import CTRLConfig -from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from .modeling_tf_outputs import TFBaseModelOutputWithPast, TFCausalLMOutputWithPast from .modeling_tf_utils import ( TFCausalLanguageModelingLoss, @@ -547,7 +547,7 @@ class TFCTRLModel(TFCTRLPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.transformer = TFCTRLMainLayer(config, name="transformer") - @add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="ctrl", @@ -602,7 +602,7 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss): return {"inputs": inputs, "past": past, "use_cache": kwargs["use_cache"]} - @add_start_docstrings_to_callable(CTRL_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="ctrl", diff --git a/src/transformers/modeling_tf_distilbert.py b/src/transformers/modeling_tf_distilbert.py index c5b0929d11..56e23a5ccd 100644 --- a/src/transformers/modeling_tf_distilbert.py +++ b/src/transformers/modeling_tf_distilbert.py @@ -25,7 +25,7 @@ from .file_utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, ) from .modeling_tf_outputs import ( TFBaseModelOutput, @@ -579,7 +579,7 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.distilbert = TFDistilBertMainLayer(config, name="distilbert") # Embeddings - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -630,7 +630,7 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel def get_output_embeddings(self): return self.vocab_projector.input_embeddings - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -718,7 +718,7 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque ) self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout) - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -800,7 +800,7 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -895,7 +895,9 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward( + DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") + ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", @@ -1007,7 +1009,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn assert config.num_labels == 2, f"Incorrect number of labels {config.num_labels} instead of 2" self.dropout = tf.keras.layers.Dropout(config.qa_dropout) - @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased", diff --git a/src/transformers/modeling_tf_electra.py b/src/transformers/modeling_tf_electra.py index 6744194e0f..d01776af97 100644 --- a/src/transformers/modeling_tf_electra.py +++ b/src/transformers/modeling_tf_electra.py @@ -11,7 +11,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_outputs import ( @@ -719,7 +719,7 @@ class TFElectraModel(TFElectraPreTrainedModel): self.electra = TFElectraMainLayer(config, name="electra") - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -749,7 +749,7 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel): self.electra = TFElectraMainLayer(config, name="electra") self.discriminator_predictions = TFElectraDiscriminatorPredictions(config, name="discriminator_predictions") - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFElectraForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, @@ -858,7 +858,7 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos def get_output_embeddings(self): return self.generator_lm_head - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-generator", @@ -971,7 +971,7 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla self.electra = TFElectraMainLayer(config, name="electra") self.classifier = TFElectraClassificationHead(config, name="classifier") - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -1072,7 +1072,7 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss) """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -1192,7 +1192,7 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", @@ -1275,7 +1275,7 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs" ) - @add_start_docstrings_to_callable(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/electra-small-discriminator", diff --git a/src/transformers/modeling_tf_flaubert.py b/src/transformers/modeling_tf_flaubert.py index 44bbebac63..be69b2d747 100644 --- a/src/transformers/modeling_tf_flaubert.py +++ b/src/transformers/modeling_tf_flaubert.py @@ -25,7 +25,12 @@ import tensorflow as tf from transformers.activations_tf import get_tf_activation from .configuration_flaubert import FlaubertConfig -from .file_utils import ModelOutput, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import ( + ModelOutput, + add_code_sample_docstrings, + add_start_docstrings, + add_start_docstrings_to_model_forward, +) from .modeling_tf_outputs import TFBaseModelOutput from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, get_initializer, keras_serializable, shape_list from .modeling_tf_xlm import ( @@ -217,7 +222,7 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.transformer = TFFlaubertMainLayer(config, name="transformer") - @add_start_docstrings_to_callable(FLAUBERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="jplu/tf-flaubert-small-cased", @@ -721,7 +726,7 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel): langs = None return {"inputs": inputs, "langs": langs} - @add_start_docstrings_to_callable(FLAUBERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="jplu/tf-flaubert-small-cased", diff --git a/src/transformers/modeling_tf_funnel.py b/src/transformers/modeling_tf_funnel.py index 9cbde8190d..4b706ed856 100644 --- a/src/transformers/modeling_tf_funnel.py +++ b/src/transformers/modeling_tf_funnel.py @@ -27,7 +27,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_outputs import ( @@ -1148,7 +1148,7 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.funnel = TFFunnelBaseLayer(config, name="funnel") - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small-base", @@ -1168,7 +1168,7 @@ class TFFunnelModel(TFFunnelPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.funnel = TFFunnelMainLayer(config, name="funnel") - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small", @@ -1192,7 +1192,7 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel): self.funnel = TFFunnelMainLayer(config, name="funnel") self.discriminator_predictions = TFFunnelDiscriminatorPredictions(config, name="discriminator_predictions") - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFFunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, @@ -1259,7 +1259,7 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss) self.funnel = TFFunnelMainLayer(config, name="funnel") self.lm_head = TFFunnelMaskedLMHead(config, self.funnel.embeddings, name="lm_head") - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small", @@ -1335,7 +1335,7 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass self.funnel = TFFunnelBaseLayer(config, name="funnel") self.classifier = TFFunnelClassificationHead(config, config.num_labels, name="classifier") - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small-base", @@ -1421,7 +1421,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small-base", @@ -1534,7 +1534,7 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small", @@ -1613,7 +1613,7 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs" ) - @add_start_docstrings_to_callable(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="funnel-transformer/small", diff --git a/src/transformers/modeling_tf_gpt2.py b/src/transformers/modeling_tf_gpt2.py index 80aac00f60..b12634b441 100644 --- a/src/transformers/modeling_tf_gpt2.py +++ b/src/transformers/modeling_tf_gpt2.py @@ -27,7 +27,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_outputs import TFBaseModelOutputWithPast, TFCausalLMOutputWithPast @@ -557,7 +557,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel): super().__init__(config, *inputs, **kwargs) self.transformer = TFGPT2MainLayer(config, name="transformer") - @add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="gpt2", @@ -591,7 +591,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): return {"inputs": inputs, "past": past, "use_cache": kwargs["use_cache"]} - @add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="gpt2", @@ -687,7 +687,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): def get_output_embeddings(self): return self.transformer.wte - @add_start_docstrings_to_callable(GPT2_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFGPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, diff --git a/src/transformers/modeling_tf_longformer.py b/src/transformers/modeling_tf_longformer.py index fafc223140..ebdbcc2773 100644 --- a/src/transformers/modeling_tf_longformer.py +++ b/src/transformers/modeling_tf_longformer.py @@ -19,7 +19,7 @@ import tensorflow as tf from transformers.activations_tf import get_tf_activation from .configuration_longformer import LongformerConfig -from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from .modeling_tf_outputs import ( TFBaseModelOutput, TFBaseModelOutputWithPooling, @@ -1624,7 +1624,7 @@ class TFLongformerModel(TFLongformerPreTrainedModel): self.longformer = TFLongformerMainLayer(config, name="longformer") - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def call(self, inputs, **kwargs): outputs = self.longformer(inputs, **kwargs) @@ -1648,7 +1648,7 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel def get_output_embeddings(self): return self.lm_head.decoder - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="allenai/longformer-base-4096", @@ -1736,7 +1736,7 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn name="qa_outputs", ) - @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="allenai/longformer-large-4096-finetuned-triviaqa", diff --git a/src/transformers/modeling_tf_lxmert.py b/src/transformers/modeling_tf_lxmert.py index a187f28790..bd9d2e0ca4 100644 --- a/src/transformers/modeling_tf_lxmert.py +++ b/src/transformers/modeling_tf_lxmert.py @@ -28,7 +28,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_utils import TFPreTrainedModel, get_initializer, keras_serializable, shape_list @@ -970,7 +970,7 @@ class TFLxmertModel(TFLxmertPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.lxmert = TFLxmertMainLayer(config, name="lxmert") - @add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="unc-nlp/lxmert-base-uncased", @@ -1224,7 +1224,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel): **({"obj_labels": obj_labels} if self.config.task_obj_predict else {}), } - @add_start_docstrings_to_callable(LXMERT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFLxmertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, diff --git a/src/transformers/modeling_tf_mobilebert.py b/src/transformers/modeling_tf_mobilebert.py index c7b122f8c1..2873b542b4 100644 --- a/src/transformers/modeling_tf_mobilebert.py +++ b/src/transformers/modeling_tf_mobilebert.py @@ -28,7 +28,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_outputs import ( @@ -960,7 +960,7 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert") - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -989,7 +989,7 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel): def get_output_embeddings(self): return self.mobilebert.embeddings - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFMobileBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call(self, inputs, **kwargs): r""" @@ -1040,7 +1040,7 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel def get_output_embeddings(self): return self.mobilebert.embeddings - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -1126,7 +1126,7 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel): self.mobilebert = TFMobileBertMainLayer(config, name="mobilebert") self.cls = TFMobileBertOnlyNSPHead(config, name="seq_relationship___cls") - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) def call(self, inputs, **kwargs): r""" @@ -1181,7 +1181,7 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -1268,7 +1268,7 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs" ) - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -1376,7 +1376,9 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward( + MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") + ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", @@ -1499,7 +1501,7 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased", diff --git a/src/transformers/modeling_tf_openai.py b/src/transformers/modeling_tf_openai.py index c78cfa6108..218946b013 100644 --- a/src/transformers/modeling_tf_openai.py +++ b/src/transformers/modeling_tf_openai.py @@ -27,7 +27,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_outputs import TFBaseModelOutput, TFCausalLMOutput @@ -495,7 +495,7 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.transformer = TFOpenAIGPTMainLayer(config, name="transformer") - @add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt", @@ -522,7 +522,7 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin def get_output_embeddings(self): return self.transformer.tokens_embed - @add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt", @@ -612,7 +612,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): def get_output_embeddings(self): return self.transformer.tokens_embed - @add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFOpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, diff --git a/src/transformers/modeling_tf_roberta.py b/src/transformers/modeling_tf_roberta.py index f3a805a30a..ce3dafed30 100644 --- a/src/transformers/modeling_tf_roberta.py +++ b/src/transformers/modeling_tf_roberta.py @@ -24,7 +24,7 @@ from .file_utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, ) from .modeling_tf_outputs import ( TFBaseModelOutput, @@ -717,7 +717,7 @@ class TFRobertaModel(TFRobertaPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.roberta = TFRobertaMainLayer(config, name="roberta") - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -776,7 +776,7 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos def get_output_embeddings(self): return self.lm_head.decoder - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -886,7 +886,7 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla self.roberta = TFRobertaMainLayer(config, name="roberta") self.classifier = TFRobertaClassificationHead(config, name="classifier") - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -978,7 +978,7 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss) """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -1096,7 +1096,7 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", @@ -1182,7 +1182,7 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs" ) - @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base", diff --git a/src/transformers/modeling_tf_t5.py b/src/transformers/modeling_tf_t5.py index dcefd5531d..3dcb29ada2 100644 --- a/src/transformers/modeling_tf_t5.py +++ b/src/transformers/modeling_tf_t5.py @@ -31,7 +31,7 @@ from .file_utils import ( DUMMY_INPUTS, DUMMY_MASK, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_outputs import TFSeq2SeqLMOutput, TFSeq2SeqModelOutput @@ -980,7 +980,7 @@ class TFT5Model(TFT5PreTrainedModel): def get_decoder(self): return self.decoder - @add_start_docstrings_to_callable(T5_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(T5_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, @@ -1177,7 +1177,7 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling def get_decoder(self): return self.decoder - @add_start_docstrings_to_callable(T5_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(T5_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, diff --git a/src/transformers/modeling_tf_transfo_xl.py b/src/transformers/modeling_tf_transfo_xl.py index c6efd7c8bd..5a86c7d646 100644 --- a/src/transformers/modeling_tf_transfo_xl.py +++ b/src/transformers/modeling_tf_transfo_xl.py @@ -23,7 +23,12 @@ from typing import List, Optional, Tuple import tensorflow as tf from .configuration_transfo_xl import TransfoXLConfig -from .file_utils import ModelOutput, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import ( + ModelOutput, + add_code_sample_docstrings, + add_start_docstrings, + add_start_docstrings_to_model_forward, +) from .modeling_tf_transfo_xl_utilities import TFAdaptiveSoftmaxMask from .modeling_tf_utils import TFPreTrainedModel, get_initializer, keras_serializable, shape_list from .tokenization_utils import BatchEncoding @@ -803,7 +808,7 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.transformer = TFTransfoXLMainLayer(config, name="transformer") - @add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="transfo-xl-wt103", @@ -873,7 +878,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): def init_mems(self, bsz): return self.transformer.init_mems(bsz) - @add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="transfo-xl-wt103", diff --git a/src/transformers/modeling_tf_xlm.py b/src/transformers/modeling_tf_xlm.py index 907a9ead52..901ca14ecc 100644 --- a/src/transformers/modeling_tf_xlm.py +++ b/src/transformers/modeling_tf_xlm.py @@ -32,7 +32,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, ) from .modeling_tf_outputs import ( TFBaseModelOutput, @@ -696,7 +696,7 @@ class TFXLMModel(TFXLMPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.transformer = TFXLMMainLayer(config, name="transformer") - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -775,7 +775,7 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel): langs = None return {"inputs": inputs, "langs": langs} - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -813,7 +813,7 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat self.transformer = TFXLMMainLayer(config, name="transformer") self.sequence_summary = TFSequenceSummary(config, initializer_range=config.init_std, name="sequence_summary") - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -914,7 +914,7 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss): "langs": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS), } - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -1056,7 +1056,7 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos config.num_labels, kernel_initializer=get_initializer(config.init_std), name="classifier" ) - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -1143,7 +1143,7 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL config.num_labels, kernel_initializer=get_initializer(config.init_std), name="qa_outputs" ) - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", diff --git a/src/transformers/modeling_tf_xlnet.py b/src/transformers/modeling_tf_xlnet.py index a48ae6b346..70a10de343 100644 --- a/src/transformers/modeling_tf_xlnet.py +++ b/src/transformers/modeling_tf_xlnet.py @@ -30,7 +30,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_tf_utils import ( @@ -1130,7 +1130,7 @@ class TFXLNetModel(TFXLNetPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.transformer = TFXLNetMainLayer(config, name="transformer") - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1197,7 +1197,7 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss): return inputs - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFXLNetLMHeadModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, @@ -1314,7 +1314,7 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="logits_proj" ) - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1417,7 +1417,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss): """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1552,7 +1552,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1639,7 +1639,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs" ) - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", diff --git a/src/transformers/modeling_transfo_xl.py b/src/transformers/modeling_transfo_xl.py index 895efe94c4..a33a0c1f27 100644 --- a/src/transformers/modeling_transfo_xl.py +++ b/src/transformers/modeling_transfo_xl.py @@ -26,7 +26,12 @@ import torch.nn as nn import torch.nn.functional as F from .configuration_transfo_xl import TransfoXLConfig -from .file_utils import ModelOutput, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import ( + ModelOutput, + add_code_sample_docstrings, + add_start_docstrings, + add_start_docstrings_to_model_forward, +) from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax from .modeling_utils import PreTrainedModel from .utils import logging @@ -830,7 +835,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel): return new_mems - @add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="transfo-xl-wt103", @@ -1018,7 +1023,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): def init_mems(self, bsz): return self.transformer.init_mems(bsz) - @add_start_docstrings_to_callable(TRANSFO_XL_INPUTS_DOCSTRING) + @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="transfo-xl-wt103", diff --git a/src/transformers/modeling_xlm.py b/src/transformers/modeling_xlm.py index d41806f85b..e404891181 100755 --- a/src/transformers/modeling_xlm.py +++ b/src/transformers/modeling_xlm.py @@ -35,7 +35,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_outputs import ( @@ -486,7 +486,7 @@ class XLMModel(XLMPreTrainedModel): for layer, heads in heads_to_prune.items(): self.attentions[layer].prune_heads(heads) - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -703,7 +703,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): langs = None return {"input_ids": input_ids, "langs": langs} - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -781,7 +781,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -868,7 +868,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -972,7 +972,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=XLMForQuestionAnsweringOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1091,7 +1091,7 @@ class XLMForTokenClassification(XLMPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", @@ -1184,7 +1184,7 @@ class XLMForMultipleChoice(XLMPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLM_INPUTS_DOCSTRING.format("batch_size, num_choicec, sequence_length")) + @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choicec, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlm-mlm-en-2048", diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index 8c83a73c16..90a90715f9 100755 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -32,7 +32,7 @@ from .file_utils import ( ModelOutput, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, replace_return_docstrings, ) from .modeling_utils import ( @@ -1064,7 +1064,7 @@ class XLNetModel(XLNetPreTrainedModel): pos_emb = pos_emb.to(self.device) return pos_emb - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1342,7 +1342,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): return inputs - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=XLNetLMHeadModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, @@ -1465,7 +1465,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1558,7 +1558,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1655,7 +1655,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1756,7 +1756,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xlnet-base-cased", @@ -1868,7 +1868,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=XLNetForQuestionAnsweringOutput, config_class=_CONFIG_FOR_DOC) def forward( self, diff --git a/templates/adding_a_new_model/modeling_tf_xxx.py b/templates/adding_a_new_model/modeling_tf_xxx.py index 7135b29c62..e4d2a0ac17 100644 --- a/templates/adding_a_new_model/modeling_tf_xxx.py +++ b/templates/adding_a_new_model/modeling_tf_xxx.py @@ -26,7 +26,7 @@ from .file_utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, - add_start_docstrings_to_callable, + add_start_docstrings_to_model_forward, ) from .modeling_tf_outputs import ( TFBaseModelOutputWithPooling, @@ -360,7 +360,7 @@ class TFXxxModel(TFXxxPreTrainedModel): super().__init__(config, *inputs, **kwargs) self.transformer = TFXxxMainLayer(config, name="transformer") - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-cased", @@ -383,7 +383,7 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel, TFMaskedLanguageModelingLoss): self.transformer = TFXxxMainLayer(config, name="transformer") self.mlm = TFXxxMLMHead(config, self.transformer.embeddings, name="mlm") - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-cased", @@ -465,7 +465,7 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel, TFSequenceClassificat config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-cased", @@ -557,7 +557,7 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss): """ return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-cased", @@ -680,7 +680,7 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-cased", @@ -761,7 +761,7 @@ class TFXxxForQuestionAnswering(TFXxxPreTrainedModel, TFQuestionAnsweringLoss): config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs" ) - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-cased", diff --git a/templates/adding_a_new_model/modeling_xxx.py b/templates/adding_a_new_model/modeling_xxx.py index b5634459c6..ccdf52a3cb 100644 --- a/templates/adding_a_new_model/modeling_xxx.py +++ b/templates/adding_a_new_model/modeling_xxx.py @@ -26,7 +26,7 @@ from torch import nn from torch.nn import CrossEntropyLoss, MSELoss from .configuration_xxx import XxxConfig -from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from .modeling_outputs import ( BaseModelOutputWithPooling, MaskedLMOutput, @@ -309,7 +309,7 @@ class XxxModel(XxxPreTrainedModel): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-uncased", @@ -391,7 +391,7 @@ class XxxForMaskedLM(XxxPreTrainedModel): def get_output_embeddings(self): return self.lm_head - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-uncased", @@ -468,7 +468,7 @@ class XxxForSequenceClassification(XxxPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-uncased", @@ -551,7 +551,7 @@ class XxxForMultipleChoice(XxxPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-uncased", @@ -641,7 +641,7 @@ class XxxForTokenClassification(XxxPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-uncased", @@ -726,7 +726,7 @@ class XxxForQuestionAnswering(XxxPreTrainedModel): self.init_weights() - @add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) + @add_start_docstrings_to_model_forward(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="xxx-base-uncased",