From 90ecc29656ce37fdbe7279cf586511ed678c0cb7 Mon Sep 17 00:00:00 2001 From: Daniel Hug <38571110+danielpatrickhug@users.noreply.github.com> Date: Fri, 5 Mar 2021 18:06:55 -0500 Subject: [PATCH] Refactoring checkpoint names for multiple models (#10527) * Refactor checkpoint name in ALBERT and ALBERT_tf * Refactor checkpoint name in BART and BART_tf * Refactor checkpoint name in BERT generation * Refactor checkpoint name in Blenderbot_tf * Refactor checkpoint name in Blenderbot_small_tf * Refactor checkpoint name in ConvBERT AND CONVBERT_TF * Refactor checkpoint name in CTRL AND CTRL_TF * Refactor checkpoint name in DistilBERT AND DistilBERT_TF * Refactor checkpoint name in DistilBERT redo * Refactor checkpoint name in Electra and Electra_tf * Refactor checkpoint name in FlauBERT and FlauBERT_tf * Refactor checkpoint name in FSMT * Refactor checkpoint name in GPT2 and GPT2_tf * Refactor checkpoint name in IBERT * Refactor checkpoint name in LED and LED_tf * Refactor checkpoint name in Longformer and Longformer_tf * Refactor checkpoint name in Lxmert and Lxmert_tf * Refactor checkpoint name in Marian_tf * Refactor checkpoint name in MBART and MBART_tf * Refactor checkpoint name in MobileBERT and MobileBERT_tf * Refactor checkpoint name in mpnet and mpnet_tf * Refactor checkpoint name in openai and openai_tf * Refactor checkpoint name in pegasus_tf * Refactor checkpoint name in reformer * Refactor checkpoint name in Roberta and Roberta_tf * Refactor checkpoint name in SqueezeBert * Refactor checkpoint name in Transformer_xl and Transformer_xl_tf * Refactor checkpoint name in XLM and XLM_tf * Refactor checkpoint name in XLNET and XLNET_tf * Refactor checkpoint name in BERT_tf * run make tests, style, quality, fixup --- src/transformers/models/albert/modeling_albert.py | 13 +++++++------ .../models/albert/modeling_tf_albert.py | 13 +++++++------ src/transformers/models/bart/modeling_bart.py | 7 ++++--- src/transformers/models/bart/modeling_tf_bart.py | 3 ++- src/transformers/models/bert/modeling_tf_bert.py | 15 ++++++++------- .../bert_generation/modeling_bert_generation.py | 3 ++- .../models/blenderbot/modeling_tf_blenderbot.py | 3 ++- .../modeling_tf_blenderbot_small.py | 3 ++- .../models/convbert/modeling_convbert.py | 13 +++++++------ .../models/convbert/modeling_tf_convbert.py | 13 +++++++------ src/transformers/models/ctrl/modeling_ctrl.py | 7 ++++--- src/transformers/models/ctrl/modeling_tf_ctrl.py | 7 ++++--- .../models/distilbert/modeling_distilbert.py | 13 ++++++------- .../models/distilbert/modeling_tf_distilbert.py | 13 +++++++------ .../models/electra/modeling_electra.py | 13 +++++++------ .../models/electra/modeling_tf_electra.py | 13 +++++++------ .../models/flaubert/modeling_flaubert.py | 3 ++- .../models/flaubert/modeling_tf_flaubert.py | 5 +++-- src/transformers/models/fsmt/modeling_fsmt.py | 3 ++- src/transformers/models/gpt2/modeling_gpt2.py | 5 +++-- src/transformers/models/gpt2/modeling_tf_gpt2.py | 5 +++-- src/transformers/models/ibert/modeling_ibert.py | 13 +++++++------ src/transformers/models/led/modeling_led.py | 7 ++++--- src/transformers/models/led/modeling_tf_led.py | 3 ++- .../models/longformer/modeling_longformer.py | 7 ++++--- .../models/longformer/modeling_tf_longformer.py | 9 +++++---- src/transformers/models/lxmert/modeling_lxmert.py | 5 +++-- .../models/lxmert/modeling_tf_lxmert.py | 4 ++-- .../models/marian/modeling_tf_marian.py | 3 ++- src/transformers/models/mbart/modeling_mbart.py | 7 ++++--- .../models/mbart/modeling_tf_mbart.py | 3 ++- .../models/mobilebert/modeling_tf_mobilebert.py | 13 +++++++------ src/transformers/models/mpnet/modeling_mpnet.py | 13 +++++++------ .../models/mpnet/modeling_tf_mpnet.py | 13 +++++++------ src/transformers/models/openai/modeling_openai.py | 7 ++++--- .../models/openai/modeling_tf_openai.py | 7 ++++--- .../models/pegasus/modeling_tf_pegasus.py | 3 ++- .../models/reformer/modeling_reformer.py | 11 ++++++----- .../models/roberta/modeling_roberta.py | 13 +++++++------ .../models/roberta/modeling_tf_roberta.py | 13 +++++++------ .../models/squeezebert/modeling_squeezebert.py | 13 +++++++------ .../models/transfo_xl/modeling_tf_transfo_xl.py | 7 ++++--- .../models/transfo_xl/modeling_transfo_xl.py | 7 ++++--- src/transformers/models/xlm/modeling_tf_xlm.py | 13 +++++++------ src/transformers/models/xlm/modeling_xlm.py | 13 +++++++------ .../models/xlnet/modeling_tf_xlnet.py | 11 ++++++----- src/transformers/models/xlnet/modeling_xlnet.py | 11 ++++++----- 47 files changed, 223 insertions(+), 179 deletions(-) diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index baceeffcce..2e20923b7b 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -52,6 +52,7 @@ from .configuration_albert import AlbertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "albert-base-v2" _CONFIG_FOR_DOC = "AlbertConfig" _TOKENIZER_FOR_DOC = "AlbertTokenizer" @@ -658,7 +659,7 @@ class AlbertModel(AlbertPreTrainedModel): @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC, ) @@ -901,7 +902,7 @@ class AlbertForMaskedLM(AlbertPreTrainedModel): @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -979,7 +980,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel): @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1067,7 +1068,7 @@ class AlbertForTokenClassification(AlbertPreTrainedModel): @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1155,7 +1156,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel): @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1254,7 +1255,7 @@ class AlbertForMultipleChoice(AlbertPreTrainedModel): @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 6e8c9f8e4a..189867addc 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -59,6 +59,7 @@ from .configuration_albert import AlbertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "albert-base-v2" _CONFIG_FOR_DOC = "AlbertConfig" _TOKENIZER_FOR_DOC = "AlbertTokenizer" @@ -780,7 +781,7 @@ class TFAlbertModel(TFAlbertPreTrainedModel): @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC, ) @@ -998,7 +999,7 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss) @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1103,7 +1104,7 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1207,7 +1208,7 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1308,7 +1309,7 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1436,7 +1437,7 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss): @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="albert-base-v2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index 1097b66013..1d15fb735e 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -49,6 +49,7 @@ from .configuration_bart import BartConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "facebook/bart-large" _CONFIG_FOR_DOC = "BartConfig" _TOKENIZER_FOR_DOC = "BartTokenizer" @@ -1109,7 +1110,7 @@ class BartModel(BartPretrainedModel): @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/bart-large", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1377,7 +1378,7 @@ class BartForSequenceClassification(BartPretrainedModel): @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/bart-large", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1482,7 +1483,7 @@ class BartForQuestionAnswering(BartPretrainedModel): @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/bart-large", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 1d47f06b19..ce67fc6541 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -52,6 +52,7 @@ from .configuration_bart import BartConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "facebook/bart-large" _CONFIG_FOR_DOC = "BartConfig" _TOKENIZER_FOR_DOC = "BartTokenizer" @@ -1170,7 +1171,7 @@ class TFBartModel(TFBartPretrainedModel): @add_start_docstrings_to_model_forward(BART_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/bart-large", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 9a72b0f432..1f26322c1b 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -64,6 +64,7 @@ from .configuration_bert import BertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "bert-base-cased" _CONFIG_FOR_DOC = "BertConfig" _TOKENIZER_FOR_DOC = "BertTokenizer" @@ -850,7 +851,7 @@ class TFBertModel(TFBertPreTrainedModel): @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="bert-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC, ) @@ -1067,7 +1068,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="bert-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1171,7 +1172,7 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss): @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="bert-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFCausalLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1382,7 +1383,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="bert-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1492,7 +1493,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss): @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="bert-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1648,7 +1649,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="bert-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1755,7 +1756,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="bert-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/bert_generation/modeling_bert_generation.py b/src/transformers/models/bert_generation/modeling_bert_generation.py index 0febbdc88f..1954e21e38 100755 --- a/src/transformers/models/bert_generation/modeling_bert_generation.py +++ b/src/transformers/models/bert_generation/modeling_bert_generation.py @@ -35,6 +35,7 @@ from .configuration_bert_generation import BertGenerationConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "google/bert_for_seq_generation_L-24_bbc_encoder" _CONFIG_FOR_DOC = "BertGenerationConfig" _TOKENIZER_FOR_DOC = "BertGenerationTokenizer" @@ -300,7 +301,7 @@ class BertGenerationEncoder(BertGenerationPreTrainedModel): @add_start_docstrings_to_model_forward(BERT_GENERATION_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/bert_for_seq_generation_L-24_bbc_encoder", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPastAndCrossAttentions, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index 81e5e9f28a..42bcad5411 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -54,6 +54,7 @@ from .configuration_blenderbot import BlenderbotConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "facebook/blenderbot-400M-distill" _CONFIG_FOR_DOC = "BlenderbotConfig" _TOKENIZER_FOR_DOC = "BlenderbotTokenizer" @@ -1186,7 +1187,7 @@ class TFBlenderbotModel(TFBlenderbotPreTrainedModel): @add_start_docstrings_to_model_forward(BLENDERBOT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/blenderbot-400M-distill", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index d0b18682d2..85ae9e9a4a 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -52,6 +52,7 @@ from .configuration_blenderbot_small import BlenderbotSmallConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "facebook/blenderbot_small-90M" _CONFIG_FOR_DOC = "BlenderbotSmallConfig" _TOKENIZER_FOR_DOC = "BlenderbotSmallTokenizer" @@ -1174,7 +1175,7 @@ class TFBlenderbotSmallModel(TFBlenderbotSmallPreTrainedModel): @add_start_docstrings_to_model_forward(BLENDERBOT_SMALL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/blenderbot_small-90M", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index 7b38431185..c31d08a56e 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -47,6 +47,7 @@ from .configuration_convbert import ConvBertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "YituTech/conv-bert-base" _CONFIG_FOR_DOC = "ConvBertConfig" _TOKENIZER_FOR_DOC = "ConvBertTokenizer" @@ -773,7 +774,7 @@ class ConvBertModel(ConvBertPreTrainedModel): @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC, ) @@ -870,7 +871,7 @@ class ConvBertForMaskedLM(ConvBertPreTrainedModel): @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -969,7 +970,7 @@ class ConvBertForSequenceClassification(ConvBertPreTrainedModel): @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1053,7 +1054,7 @@ class ConvBertForMultipleChoice(ConvBertPreTrainedModel): ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1145,7 +1146,7 @@ class ConvBertForTokenClassification(ConvBertPreTrainedModel): @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1232,7 +1233,7 @@ class ConvBertForQuestionAnswering(ConvBertPreTrainedModel): @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index b3441e2931..d5afa6363e 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -51,6 +51,7 @@ from .configuration_convbert import ConvBertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "YituTech/conv-bert-base" _CONFIG_FOR_DOC = "ConvBertConfig" _TOKENIZER_FOR_DOC = "ConvBertTokenizer" @@ -750,7 +751,7 @@ class TFConvBertModel(TFConvBertPreTrainedModel): @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -882,7 +883,7 @@ class TFConvBertForMaskedLM(TFConvBertPreTrainedModel, TFMaskedLanguageModelingL @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1003,7 +1004,7 @@ class TFConvBertForSequenceClassification(TFConvBertPreTrainedModel, TFSequenceC @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1112,7 +1113,7 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1247,7 +1248,7 @@ class TFConvBertForTokenClassification(TFConvBertPreTrainedModel, TFTokenClassif @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1342,7 +1343,7 @@ class TFConvBertForQuestionAnswering(TFConvBertPreTrainedModel, TFQuestionAnswer @add_start_docstrings_to_model_forward(CONVBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="YituTech/conv-bert-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/ctrl/modeling_ctrl.py b/src/transformers/models/ctrl/modeling_ctrl.py index 144da74a8c..c883aa7bf7 100644 --- a/src/transformers/models/ctrl/modeling_ctrl.py +++ b/src/transformers/models/ctrl/modeling_ctrl.py @@ -31,6 +31,7 @@ from .configuration_ctrl import CTRLConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "ctrl" _CONFIG_FOR_DOC = "CTRLConfig" _TOKENIZER_FOR_DOC = "CTRLTokenizer" @@ -355,7 +356,7 @@ class CTRLModel(CTRLPreTrainedModel): @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ctrl", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPast, config_class=_CONFIG_FOR_DOC, ) @@ -515,7 +516,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ctrl", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC, ) @@ -618,7 +619,7 @@ class CTRLForSequenceClassification(CTRLPreTrainedModel): @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ctrl", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index 73c76fb1a2..def747a46d 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -38,6 +38,7 @@ from .configuration_ctrl import CTRLConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "ctrl" _CONFIG_FOR_DOC = "CTRLConfig" _TOKENIZER_FOR_DOC = "CTRLTokenizer" @@ -543,7 +544,7 @@ class TFCTRLModel(TFCTRLPreTrainedModel): @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ctrl", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutputWithPast, config_class=_CONFIG_FOR_DOC, ) @@ -671,7 +672,7 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss): @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ctrl", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC, ) @@ -795,7 +796,7 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific @add_start_docstrings_to_model_forward(CTRL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ctrl", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/distilbert/modeling_distilbert.py b/src/transformers/models/distilbert/modeling_distilbert.py index 5a8cb719e8..65c0def694 100755 --- a/src/transformers/models/distilbert/modeling_distilbert.py +++ b/src/transformers/models/distilbert/modeling_distilbert.py @@ -52,7 +52,7 @@ from .configuration_distilbert import DistilBertConfig logger = logging.get_logger(__name__) - +_CHECKPOINT_FOR_DOC = "distilbert-base-uncased" _CONFIG_FOR_DOC = "DistilBertConfig" _TOKENIZER_FOR_DOC = "DistilBertTokenizer" @@ -441,11 +441,10 @@ class DistilBertModel(DistilBertPreTrainedModel): @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC, ) - @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="distilbert-base-uncased") def forward( self, input_ids=None, @@ -517,7 +516,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -593,7 +592,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel): @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -674,7 +673,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -770,7 +769,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel): @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 690757146f..9299fdc752 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -53,6 +53,7 @@ from .configuration_distilbert import DistilBertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "distilbert-base-uncased" _CONFIG_FOR_DOC = "DistilBertConfig" _TOKENIZER_FOR_DOC = "DistilBertTokenizer" @@ -545,7 +546,7 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel): @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -660,7 +661,7 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -761,7 +762,7 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -856,7 +857,7 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -964,7 +965,7 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1090,7 +1091,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn @add_start_docstrings_to_model_forward(DISTILBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="distilbert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py index 50bec0f98e..59605bc428 100644 --- a/src/transformers/models/electra/modeling_electra.py +++ b/src/transformers/models/electra/modeling_electra.py @@ -54,6 +54,7 @@ from .configuration_electra import ElectraConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "google/electra-small-discriminator" _CONFIG_FOR_DOC = "ElectraConfig" _TOKENIZER_FOR_DOC = "ElectraTokenizer" @@ -812,7 +813,7 @@ class ElectraModel(ElectraPreTrainedModel): @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC, ) @@ -910,7 +911,7 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel): @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1092,7 +1093,7 @@ class ElectraForMaskedLM(ElectraPreTrainedModel): @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1171,7 +1172,7 @@ class ElectraForTokenClassification(ElectraPreTrainedModel): @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1258,7 +1259,7 @@ class ElectraForQuestionAnswering(ElectraPreTrainedModel): @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1359,7 +1360,7 @@ class ElectraForMultipleChoice(ElectraPreTrainedModel): @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 734f6343b4..8a4a77db54 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -57,6 +57,7 @@ from .configuration_electra import ElectraConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "google/electra-small-discriminator" _CONFIG_FOR_DOC = "ElectraConfig" _TOKENIZER_FOR_DOC = "ElectraTokenizer" @@ -732,7 +733,7 @@ class TFElectraModel(TFElectraPreTrainedModel): @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -951,7 +952,7 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-generator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1071,7 +1072,7 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1179,7 +1180,7 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss) @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1316,7 +1317,7 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1413,7 +1414,7 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/electra-small-discriminator", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/flaubert/modeling_flaubert.py b/src/transformers/models/flaubert/modeling_flaubert.py index 6168d7d229..1603ce1f4b 100644 --- a/src/transformers/models/flaubert/modeling_flaubert.py +++ b/src/transformers/models/flaubert/modeling_flaubert.py @@ -38,6 +38,7 @@ from .configuration_flaubert import FlaubertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "flaubert/flaubert_base_cased" _CONFIG_FOR_DOC = "FlaubertConfig" _TOKENIZER_FOR_DOC = "FlaubertTokenizer" @@ -143,7 +144,7 @@ class FlaubertModel(XLMModel): @add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="flaubert/flaubert_base_cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index f3465c39f9..b5f8c7b199 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -52,6 +52,7 @@ from .configuration_flaubert import FlaubertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "flaubert/flaubert_base_cased" _CONFIG_FOR_DOC = "FlaubertConfig" _TOKENIZER_FOR_DOC = "FlaubertTokenizer" @@ -236,7 +237,7 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel): @add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="jplu/tf-flaubert-small-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -822,7 +823,7 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel): @add_start_docstrings_to_model_forward(FLAUBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="jplu/tf-flaubert-small-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFFlaubertWithLMHeadModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index 5ad1a0ca7b..f644c6b43d 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -57,6 +57,7 @@ from .configuration_fsmt import FSMTConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "facebook/wmt19-ru-en" _CONFIG_FOR_DOC = "FSMTConfig" _TOKENIZER_FOR_DOC = "FSMTTokenizer" @@ -989,7 +990,7 @@ class FSMTModel(PretrainedFSMTModel): @add_start_docstrings_to_model_forward(FSMT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/wmt19-ru-en", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 52937e0ead..4dd2c07509 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -51,6 +51,7 @@ from .configuration_gpt2 import GPT2Config logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "gpt2" _CONFIG_FOR_DOC = "GPT2Config" _TOKENIZER_FOR_DOC = "GPT2Tokenizer" @@ -599,7 +600,7 @@ class GPT2Model(GPT2PreTrainedModel): @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="gpt2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPastAndCrossAttentions, config_class=_CONFIG_FOR_DOC, ) @@ -871,7 +872,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="gpt2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index dc233f5d00..c2ebb2ebd7 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -51,6 +51,7 @@ from .configuration_gpt2 import GPT2Config logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "gpt2" _CONFIG_FOR_DOC = "GPT2Config" _TOKENIZER_FOR_DOC = "GPT2Tokenizer" @@ -587,7 +588,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel): @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="gpt2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutputWithPast, config_class=_CONFIG_FOR_DOC, ) @@ -679,7 +680,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="gpt2", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/ibert/modeling_ibert.py b/src/transformers/models/ibert/modeling_ibert.py index edc4747a5c..a064efe3bc 100644 --- a/src/transformers/models/ibert/modeling_ibert.py +++ b/src/transformers/models/ibert/modeling_ibert.py @@ -43,6 +43,7 @@ from .quant_modules import IntGELU, IntLayerNorm, IntSoftmax, QuantAct, QuantEmb logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "ibert-roberta-base" _CONFIG_FOR_DOC = "IBertConfig" _TOKENIZER_FOR_DOC = "RobertaTokenizer" @@ -776,7 +777,7 @@ class IBertModel(IBertPreTrainedModel): @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ibert-roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPoolingAndCrossAttentions, config_class=_CONFIG_FOR_DOC, ) @@ -880,7 +881,7 @@ class IBertForMaskedLM(IBertPreTrainedModel): @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ibert-roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, mask="", @@ -986,7 +987,7 @@ class IBertForSequenceClassification(IBertPreTrainedModel): @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ibert-roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1069,7 +1070,7 @@ class IBertForMultipleChoice(IBertPreTrainedModel): @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ibert-roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1163,7 +1164,7 @@ class IBertForTokenClassification(IBertPreTrainedModel): @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ibert-roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1272,7 +1273,7 @@ class IBertForQuestionAnswering(IBertPreTrainedModel): @add_start_docstrings_to_model_forward(IBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="ibert-roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index a750590bb3..c61a76c58a 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -49,6 +49,7 @@ from .configuration_led import LEDConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "allenai/led-base-16384" _CONFIG_FOR_DOC = "LEDConfig" _TOKENIZER_FOR_DOC = "LEDTokenizer" @@ -2168,7 +2169,7 @@ class LEDModel(LEDPreTrainedModel): @add_start_docstrings_to_model_forward(LED_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/led-base-16384", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2451,7 +2452,7 @@ class LEDForSequenceClassification(LEDPreTrainedModel): @add_start_docstrings_to_model_forward(LED_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/led-base-16384", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2559,7 +2560,7 @@ class LEDForQuestionAnswering(LEDPreTrainedModel): @add_start_docstrings_to_model_forward(LED_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/led-base-16384", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 6ed546ec53..3e7c49c9d7 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -47,6 +47,7 @@ from .configuration_led import LEDConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "allenai/led-base-16384" _CONFIG_FOR_DOC = "LEDConfig" _TOKENIZER_FOR_DOC = "LEDTokenizer" @@ -2228,7 +2229,7 @@ class TFLEDModel(TFLEDPreTrainedModel): @add_start_docstrings_to_model_forward(LED_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/led-base-16384", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLEDSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index b254e3a5fd..aafc079ca6 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -44,6 +44,7 @@ from .configuration_longformer import LongformerConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "allenai/longformer-base-4096" _CONFIG_FOR_DOC = "LongformerConfig" _TOKENIZER_FOR_DOC = "LongformerTokenizer" @@ -1811,7 +1812,7 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/longformer-base-4096", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=LongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2059,7 +2060,7 @@ class LongformerForTokenClassification(LongformerPreTrainedModel): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/longformer-base-4096", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=LongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2150,7 +2151,7 @@ class LongformerForMultipleChoice(LongformerPreTrainedModel): ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/longformer-base-4096", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=LongformerMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 1992f13970..67bd7cd741 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -46,6 +46,7 @@ from .configuration_longformer import LongformerConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "allenai/longformer-base-4096" _CONFIG_FOR_DOC = "LongformerConfig" _TOKENIZER_FOR_DOC = "LongformerTokenizer" @@ -2081,7 +2082,7 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/longformer-base-4096", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLongformerMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2358,7 +2359,7 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/longformer-base-4096", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2484,7 +2485,7 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/longformer-base-4096", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLongformerMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2637,7 +2638,7 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="allenai/longformer-base-4096", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/lxmert/modeling_lxmert.py b/src/transformers/models/lxmert/modeling_lxmert.py index 22cc0bc839..d2cf8602d1 100644 --- a/src/transformers/models/lxmert/modeling_lxmert.py +++ b/src/transformers/models/lxmert/modeling_lxmert.py @@ -40,6 +40,7 @@ from .configuration_lxmert import LxmertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "unc-nlp/lxmert-base-uncased" _CONFIG_FOR_DOC = "LxmertConfig" _TOKENIZER_FOR_DOC = "LxmertTokenizer" @@ -901,7 +902,7 @@ class LxmertModel(LxmertPreTrainedModel): @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="unc-nlp/lxmert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=LxmertModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1382,7 +1383,7 @@ class LxmertForQuestionAnswering(LxmertPreTrainedModel): @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="unc-nlp/lxmert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=LxmertForQuestionAnsweringOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index 8049da1cfd..e20ddc8f3c 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -37,7 +37,7 @@ from .configuration_lxmert import LxmertConfig logger = logging.get_logger(__name__) - +_CHECKPOINT_FOR_DOC = "unc-nlp/lxmert-base-uncased" _CONFIG_FOR_DOC = "LxmertConfig" _TOKENIZER_FOR_DOC = "LxmertTokenizer" @@ -951,7 +951,7 @@ class TFLxmertModel(TFLxmertPreTrainedModel): @add_start_docstrings_to_model_forward(LXMERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="unc-nlp/lxmert-base-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLxmertModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index 578493bace..15271f8b22 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -53,6 +53,7 @@ from .configuration_marian import MarianConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "Helsinki-NLP/opus-mt-en-de" _CONFIG_FOR_DOC = "MarianConfig" _TOKENIZER_FOR_DOC = "MarianTokenizer" @@ -1203,7 +1204,7 @@ class TFMarianModel(TFMarianPreTrainedModel): @add_start_docstrings_to_model_forward(MARIAN_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="Helsinki-NLP/opus-mt-en-de", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index a52fbe343b..61763cc38c 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -48,6 +48,7 @@ from .configuration_mbart import MBartConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25" _CONFIG_FOR_DOC = "MBartConfig" _TOKENIZER_FOR_DOC = "MBartTokenizer" @@ -1121,7 +1122,7 @@ class MBartModel(MBartPreTrainedModel): @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/mbart-large-cc25", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1377,7 +1378,7 @@ class MBartForSequenceClassification(MBartPreTrainedModel): @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/mbart-large-cc25", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1483,7 +1484,7 @@ class MBartForQuestionAnswering(MBartPreTrainedModel): @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/mbart-large-cc25", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 6ca54b2c85..ea3294aa5a 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -52,6 +52,7 @@ from .configuration_mbart import MBartConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25" _CONFIG_FOR_DOC = "MBartConfig" _TOKENIZER_FOR_DOC = "MBartTokenizer" @@ -1189,7 +1190,7 @@ class TFMBartModel(TFMBartPreTrainedModel): @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="facebook/mbart-large-cc25", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 1c8bf9c6de..372549862e 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -59,6 +59,7 @@ from .configuration_mobilebert import MobileBertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "google/mobilebert-uncased" _CONFIG_FOR_DOC = "MobileBertConfig" _TOKENIZER_FOR_DOC = "MobileBertTokenizer" @@ -935,7 +936,7 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel): @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/mobilebert-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC, ) @@ -1127,7 +1128,7 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/mobilebert-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1348,7 +1349,7 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/mobilebert-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1456,7 +1457,7 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/mobilebert-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1591,7 +1592,7 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/mobilebert-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1739,7 +1740,7 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/mobilebert-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/mpnet/modeling_mpnet.py b/src/transformers/models/mpnet/modeling_mpnet.py index d8a6f311ad..8b9867caeb 100644 --- a/src/transformers/models/mpnet/modeling_mpnet.py +++ b/src/transformers/models/mpnet/modeling_mpnet.py @@ -40,6 +40,7 @@ from .configuration_mpnet import MPNetConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "microsoft/mpnet-base" _CONFIG_FOR_DOC = "MPNetConfig" _TOKENIZER_FOR_DOC = "MPNetTokenizer" @@ -511,7 +512,7 @@ class MPNetModel(MPNetPreTrainedModel): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC, ) @@ -593,7 +594,7 @@ class MPNetForMaskedLM(MPNetPreTrainedModel): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -695,7 +696,7 @@ class MPNetForSequenceClassification(MPNetPreTrainedModel): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -777,7 +778,7 @@ class MPNetForMultipleChoice(MPNetPreTrainedModel): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -869,7 +870,7 @@ class MPNetForTokenClassification(MPNetPreTrainedModel): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -977,7 +978,7 @@ class MPNetForQuestionAnswering(MPNetPreTrainedModel): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index c3b95c6b77..dd02f2aa41 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -55,6 +55,7 @@ from .configuration_mpnet import MPNetConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "microsoft/mpnet-base" _CONFIG_FOR_DOC = "MPNetConfig" _TOKENIZER_FOR_DOC = "MPNetTokenizer" @@ -684,7 +685,7 @@ class TFMPNetModel(TFMPNetPreTrainedModel): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -814,7 +815,7 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -935,7 +936,7 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1041,7 +1042,7 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss): @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1173,7 +1174,7 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1272,7 +1273,7 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="microsoft/mpnet-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/openai/modeling_openai.py b/src/transformers/models/openai/modeling_openai.py index 69ff6e676d..0864a8b328 100644 --- a/src/transformers/models/openai/modeling_openai.py +++ b/src/transformers/models/openai/modeling_openai.py @@ -48,6 +48,7 @@ from .configuration_openai import OpenAIGPTConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "openai-gpt" _CONFIG_FOR_DOC = "OpenAIGPTConfig" _TOKENIZER_FOR_DOC = "OpenAIGPTTokenizer" @@ -433,7 +434,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="openai-gpt", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -552,7 +553,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="openai-gpt", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -756,7 +757,7 @@ class OpenAIGPTForSequenceClassification(OpenAIGPTPreTrainedModel): @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="openai-gpt", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 0ce16b670f..0c2c7e2a66 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -47,6 +47,7 @@ from .configuration_openai import OpenAIGPTConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "openai-gpt" _CONFIG_FOR_DOC = "OpenAIGPTConfig" _TOKENIZER_FOR_DOC = "OpenAIGPTTokenizer" @@ -522,7 +523,7 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel): @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="openai-gpt", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -598,7 +599,7 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="openai-gpt", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFCausalLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -857,7 +858,7 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc @add_start_docstrings_to_model_forward(OPENAI_GPT_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="openai-gpt", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 536914688a..504c7d23af 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -53,6 +53,7 @@ from .configuration_pegasus import PegasusConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "google/pegasus-large" _CONFIG_FOR_DOC = "PegasusConfig" _TOKENIZER_FOR_DOC = "PegasusTokenizer" @@ -1216,7 +1217,7 @@ class TFPegasusModel(TFPegasusPreTrainedModel): @add_start_docstrings_to_model_forward(PEGASUS_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/pegasus-large", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index 0f8caa8bef..0ff34454ae 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -45,6 +45,7 @@ from .configuration_reformer import ReformerConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "google/reformer-crime-and-punishment" _CONFIG_FOR_DOC = "ReformerConfig" _TOKENIZER_FOR_DOC = "ReformerTokenizer" @@ -1997,7 +1998,7 @@ class ReformerModel(ReformerPreTrainedModel): @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/reformer-crime-and-punishment", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=ReformerModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2204,7 +2205,7 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel): @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/reformer-crime-and-punishment", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2319,7 +2320,7 @@ class ReformerForMaskedLM(ReformerPreTrainedModel): @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/reformer-crime-and-punishment", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2399,7 +2400,7 @@ class ReformerForSequenceClassification(ReformerPreTrainedModel): @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/reformer-crime-and-punishment", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -2501,7 +2502,7 @@ class ReformerForQuestionAnswering(ReformerPreTrainedModel): @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="google/reformer-crime-and-punishment", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index 8297a0a855..f7cc6b5555 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -51,6 +51,7 @@ from .configuration_roberta import RobertaConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "roberta-base" _CONFIG_FOR_DOC = "RobertaConfig" _TOKENIZER_FOR_DOC = "RobertaTokenizer" @@ -709,7 +710,7 @@ class RobertaModel(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPoolingAndCrossAttentions, config_class=_CONFIG_FOR_DOC, ) @@ -1012,7 +1013,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, mask="", @@ -1122,7 +1123,7 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1205,7 +1206,7 @@ class RobertaForMultipleChoice(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1299,7 +1300,7 @@ class RobertaForTokenClassification(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1408,7 +1409,7 @@ class RobertaForQuestionAnswering(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 24f47c36f5..07922d6b2e 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -57,6 +57,7 @@ from .configuration_roberta import RobertaConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "roberta-base" _CONFIG_FOR_DOC = "RobertaConfig" _TOKENIZER_FOR_DOC = "RobertaTokenizer" @@ -707,7 +708,7 @@ class TFRobertaModel(TFRobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC, ) @@ -841,7 +842,7 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -965,7 +966,7 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1076,7 +1077,7 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss) @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1211,7 +1212,7 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1312,7 +1313,7 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="roberta-base", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/squeezebert/modeling_squeezebert.py b/src/transformers/models/squeezebert/modeling_squeezebert.py index d298a6d71a..455bc4881d 100644 --- a/src/transformers/models/squeezebert/modeling_squeezebert.py +++ b/src/transformers/models/squeezebert/modeling_squeezebert.py @@ -39,6 +39,7 @@ from .configuration_squeezebert import SqueezeBertConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "squeezebert/squeezebert-uncased" _CONFIG_FOR_DOC = "SqueezeBertConfig" _TOKENIZER_FOR_DOC = "SqueezeBertTokenizer" @@ -572,7 +573,7 @@ class SqueezeBertModel(SqueezeBertPreTrainedModel): @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="squeezebert/squeezebert-mnli-headless", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC, ) @@ -665,7 +666,7 @@ class SqueezeBertForMaskedLM(SqueezeBertPreTrainedModel): @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="squeezebert/squeezebert-uncased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, ) @@ -743,7 +744,7 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel): @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="squeezebert/squeezebert-mnli-headless", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -829,7 +830,7 @@ class SqueezeBertForMultipleChoice(SqueezeBertPreTrainedModel): ) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="squeezebert/squeezebert-mnli-headless", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -921,7 +922,7 @@ class SqueezeBertForTokenClassification(SqueezeBertPreTrainedModel): @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="squeezebert/squeezebert-mnli-headless", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1008,7 +1009,7 @@ class SqueezeBertForQuestionAnswering(SqueezeBertPreTrainedModel): @add_start_docstrings_to_model_forward(SQUEEZEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="squeezebert/squeezebert-mnli-headless", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index 996eb7f42d..31d3aae482 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -43,6 +43,7 @@ from .modeling_tf_transfo_xl_utilities import TFAdaptiveSoftmaxMask logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "transfo-xl-wt103" _CONFIG_FOR_DOC = "TransfoXLConfig" _TOKENIZER_FOR_DOC = "TransfoXLTokenizer" @@ -883,7 +884,7 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel): @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="transfo-xl-wt103", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTransfoXLModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -975,7 +976,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="transfo-xl-wt103", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTransfoXLLMHeadModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1091,7 +1092,7 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="transfo-xl-wt103", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTransfoXLSequenceClassifierOutputWithPast, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_transfo_xl.py index 01d5cf0454..bab4af8b3f 100644 --- a/src/transformers/models/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_transfo_xl.py @@ -39,6 +39,7 @@ from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "transfo-xl-wt103" _CONFIG_FOR_DOC = "TransfoXLConfig" _TOKENIZER_FOR_DOC = "TransfoXLTokenizer" @@ -872,7 +873,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel): @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="transfo-xl-wt103", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TransfoXLModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1053,7 +1054,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="transfo-xl-wt103", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TransfoXLLMHeadModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1175,7 +1176,7 @@ class TransfoXLForSequenceClassification(TransfoXLPreTrainedModel): @add_start_docstrings_to_model_forward(TRANSFO_XL_INPUTS_DOCSTRING) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="transfo-xl-wt103", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TransfoXLSequenceClassifierOutputWithPast, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index b94310a2b7..fb1ba012e7 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -58,6 +58,7 @@ from .configuration_xlm import XLMConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "xlm-mlm-en-2048" _CONFIG_FOR_DOC = "XLMConfig" _TOKENIZER_FOR_DOC = "XLMTokenizer" @@ -703,7 +704,7 @@ class TFXLMModel(TFXLMPreTrainedModel): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -856,7 +857,7 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFXLMWithLMHeadModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -946,7 +947,7 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1072,7 +1073,7 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1222,7 +1223,7 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1327,7 +1328,7 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index bb7dddb287..8867d32547 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -57,6 +57,7 @@ from .configuration_xlm import XLMConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "xlm-mlm-en-2048" _CONFIG_FOR_DOC = "XLMConfig" _TOKENIZER_FOR_DOC = "XLMTokenizer" @@ -489,7 +490,7 @@ class XLMModel(XLMPreTrainedModel): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -709,7 +710,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, mask="", @@ -787,7 +788,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -874,7 +875,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1097,7 +1098,7 @@ class XLMForTokenClassification(XLMPreTrainedModel): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1190,7 +1191,7 @@ class XLMForMultipleChoice(XLMPreTrainedModel): @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choicec, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlm-mlm-en-2048", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=MultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 89e83995d8..21348d3be7 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -52,6 +52,7 @@ from .configuration_xlnet import XLNetConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "xlnet-base-cased" _CONFIG_FOR_DOC = "XLNetConfig" _TOKENIZER_FOR_DOC = "XLNetTokenizer" @@ -1154,7 +1155,7 @@ class TFXLNetModel(TFXLNetPreTrainedModel): @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFXLNetModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1423,7 +1424,7 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFXLNetForSequenceClassificationOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1549,7 +1550,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss): @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFXLNetForMultipleChoiceOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1698,7 +1699,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFXLNetForTokenClassificationOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1805,7 +1806,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFXLNetForQuestionAnsweringSimpleOutput, config_class=_CONFIG_FOR_DOC, ) diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index 74874a9f05..d60462ad0f 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -47,6 +47,7 @@ from .configuration_xlnet import XLNetConfig logger = logging.get_logger(__name__) +_CHECKPOINT_FOR_DOC = "xlnet-base-cased" _CONFIG_FOR_DOC = "XLNetConfig" _TOKENIZER_FOR_DOC = "XLNetTokenizer" @@ -1070,7 +1071,7 @@ class XLNetModel(XLNetPreTrainedModel): @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=XLNetModelOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1497,7 +1498,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=XLNetForSequenceClassificationOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1591,7 +1592,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel): @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=XLNetForTokenClassificationOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1688,7 +1689,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel): @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=XLNetForMultipleChoiceOutput, config_class=_CONFIG_FOR_DOC, ) @@ -1791,7 +1792,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel): @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( tokenizer_class=_TOKENIZER_FOR_DOC, - checkpoint="xlnet-base-cased", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=XLNetForQuestionAnsweringSimpleOutput, config_class=_CONFIG_FOR_DOC, )