Refactoring checkpoint names for multiple models (#10527)

* Refactor checkpoint name in ALBERT and ALBERT_tf * Refactor checkpoint name in BART and BART_tf * Refactor checkpoint name in BERT generation * Refactor checkpoint name in Blenderbot_tf * Refactor checkpoint name in Blenderbot_small_tf * Refactor checkpoint name in ConvBERT AND CONVBERT_TF * Refactor checkpoint name in CTRL AND CTRL_TF * Refactor checkpoint name in DistilBERT AND DistilBERT_TF * Refactor checkpoint name in DistilBERT redo * Refactor checkpoint name in Electra and Electra_tf * Refactor checkpoint name in FlauBERT and FlauBERT_tf * Refactor checkpoint name in FSMT * Refactor checkpoint name in GPT2 and GPT2_tf * Refactor checkpoint name in IBERT * Refactor checkpoint name in LED and LED_tf * Refactor checkpoint name in Longformer and Longformer_tf * Refactor checkpoint name in Lxmert and Lxmert_tf * Refactor checkpoint name in Marian_tf * Refactor checkpoint name in MBART and MBART_tf * Refactor checkpoint name in MobileBERT and MobileBERT_tf * Refactor checkpoint name in mpnet and mpnet_tf * Refactor checkpoint name in openai and openai_tf * Refactor checkpoint name in pegasus_tf * Refactor checkpoint name in reformer * Refactor checkpoint name in Roberta and Roberta_tf * Refactor checkpoint name in SqueezeBert * Refactor checkpoint name in Transformer_xl and Transformer_xl_tf * Refactor checkpoint name in XLM and XLM_tf * Refactor checkpoint name in XLNET and XLNET_tf * Refactor checkpoint name in BERT_tf * run make tests, style, quality, fixup
2021-03-05 18:06:55 -05:00
parent defe9e20fe
commit 90ecc29656
47 changed files with 223 additions and 179 deletions
--- a/src/transformers/models/bert/modeling_tf_bert.py
+++ b/src/transformers/models/bert/modeling_tf_bert.py
@@ -64,6 +64,7 @@ from .configuration_bert import BertConfig

 logger = logging.get_logger(__name__)

+_CHECKPOINT_FOR_DOC = "bert-base-cased"
 _CONFIG_FOR_DOC = "BertConfig"
 _TOKENIZER_FOR_DOC = "BertTokenizer"

@@ -850,7 +851,7 @@ class TFBertModel(TFBertPreTrainedModel):
    @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="bert-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFBaseModelOutputWithPooling,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1067,7 +1068,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
    @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="bert-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFMaskedLMOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1171,7 +1172,7 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):

    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="bert-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFCausalLMOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1382,7 +1383,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
    @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="bert-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFSequenceClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1492,7 +1493,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
    @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="bert-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFMultipleChoiceModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1648,7 +1649,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
    @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="bert-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFTokenClassifierOutput,
        config_class=_CONFIG_FOR_DOC,
    )
@@ -1755,7 +1756,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
    @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @add_code_sample_docstrings(
        tokenizer_class=_TOKENIZER_FOR_DOC,
-        checkpoint="bert-base-cased",
+        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=TFQuestionAnsweringModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )