From 2831826bc60ee11b86179252ffc8401cb03c5904 Mon Sep 17 00:00:00 2001 From: Minh Chien Vu <31467068+vumichien@users.noreply.github.com> Date: Mon, 11 Apr 2022 22:51:28 +0900 Subject: [PATCH] Add Doc Test for BERT (#16523) * Add doctest BERT * make fixup * fix typo * change checkpoints * make fixup * define doctest output value, update doctest for mobilebert * solve fix-copies * update QA target start index and end index * change checkpoint for docs and reuse defined variable * Update src/transformers/models/bert/modeling_tf_bert.py Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * make fixup Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> --- src/transformers/models/bert/modeling_bert.py | 104 ++++++++++-------- .../models/bert/modeling_tf_bert.py | 43 ++++++-- .../models/mobilebert/modeling_mobilebert.py | 38 ++++++- .../mobilebert/modeling_tf_mobilebert.py | 33 +++++- utils/documentation_tests.txt | 4 + 5 files changed, 160 insertions(+), 62 deletions(-) diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 6e1a41a6b3..20b65d0c06 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -63,6 +63,26 @@ _CHECKPOINT_FOR_DOC = "bert-base-uncased" _CONFIG_FOR_DOC = "BertConfig" _TOKENIZER_FOR_DOC = "BertTokenizer" +# TokenClassification docstring +_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english" +_TOKEN_CLASS_EXPECTED_OUTPUT = ( + "['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] " +) +_TOKEN_CLASS_EXPECTED_LOSS = 0.01 + +# QuestionAnswering docstring +_CHECKPOINT_FOR_QA = "deepset/bert-base-cased-squad2" +_QA_EXPECTED_OUTPUT = "'a nice puppet'" +_QA_EXPECTED_LOSS = 7.41 +_QA_TARGET_START_INDEX = 14 +_QA_TARGET_END_INDEX = 15 + +# SequenceClassification docstring +_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "textattack/bert-base-uncased-yelp-polarity" +_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'" +_SEQ_CLASS_EXPECTED_LOSS = 0.01 + + BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ "bert-base-uncased", "bert-large-uncased", @@ -1156,7 +1176,12 @@ class BertLMHeadModel(BertPreTrainedModel): self.cls.predictions.decoder = new_embeddings @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) - @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) + @add_code_sample_docstrings( + processor_class=_TOKENIZER_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_DOC, + output_type=CausalLMOutputWithCrossAttentions, + config_class=_CONFIG_FOR_DOC, + ) def forward( self, input_ids: Optional[torch.Tensor] = None, @@ -1175,49 +1200,28 @@ class BertLMHeadModel(BertPreTrainedModel): return_dict: Optional[bool] = None, ) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]: r""" - encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): - Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention - if the model is configured as a decoder. - encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*): - Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used - in the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`: + encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): + Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if + the model is configured as a decoder. + encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in + the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`: - - 1 for tokens that are **not masked**, - - 0 for tokens that are **masked**. - labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): - Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be - in `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` - are ignored (masked), the loss is only computed for the tokens with labels n `[0, ..., - config.vocab_size]` - past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): - Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up - decoding. + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in + `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are + ignored (masked), the loss is only computed for the tokens with labels n `[0, ..., config.vocab_size]` + past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): + Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding. - If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those - that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all `decoder_input_ids` of shape `(batch_size, sequence_length)`. - use_cache (`bool`, *optional*): - If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding - (see `past_key_values`). - - Returns: - - Example: - - ```python - >>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig - >>> import torch - - >>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased") - >>> config = BertConfig.from_pretrained("bert-base-cased") - >>> config.is_decoder = True - >>> model = BertLMHeadModel.from_pretrained("bert-base-cased", config=config) - - >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") - >>> outputs = model(**inputs) - - >>> prediction_logits = outputs.logits - ``` + If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that + don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all + `decoder_input_ids` of shape `(batch_size, sequence_length)`. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see + `past_key_values`). """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict if labels is not None: @@ -1315,6 +1319,8 @@ class BertForMaskedLM(BertPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, + expected_output="'paris'", + expected_loss=0.88, ) def forward( self, @@ -1517,9 +1523,11 @@ class BertForSequenceClassification(BertPreTrainedModel): @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, + expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) def forward( self, @@ -1716,9 +1724,11 @@ class BertForTokenClassification(BertPreTrainedModel): @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT, + expected_loss=_TOKEN_CLASS_EXPECTED_LOSS, ) def forward( self, @@ -1797,9 +1807,13 @@ class BertForQuestionAnswering(BertPreTrainedModel): @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_QA, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + qa_target_start_index=_QA_TARGET_START_INDEX, + qa_target_end_index=_QA_TARGET_END_INDEX, + expected_output=_QA_EXPECTED_OUTPUT, + expected_loss=_QA_EXPECTED_LOSS, ) def forward( self, diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 5e8775ab0d..b64e203253 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -65,10 +65,29 @@ from .configuration_bert import BertConfig logger = logging.get_logger(__name__) -_CHECKPOINT_FOR_DOC = "bert-base-cased" +_CHECKPOINT_FOR_DOC = "bert-base-uncased" _CONFIG_FOR_DOC = "BertConfig" _TOKENIZER_FOR_DOC = "BertTokenizer" +# TokenClassification docstring +_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english" +_TOKEN_CLASS_EXPECTED_OUTPUT = ( + "['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] " +) +_TOKEN_CLASS_EXPECTED_LOSS = 0.01 + +# QuestionAnswering docstring +_CHECKPOINT_FOR_QA = "ydshieh/bert-base-cased-squad2" +_QA_EXPECTED_OUTPUT = "'a nice puppet'" +_QA_EXPECTED_LOSS = 7.41 +_QA_TARGET_START_INDEX = 14 +_QA_TARGET_END_INDEX = 15 + +# SequenceClassification docstring +_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "ydshieh/bert-base-uncased-yelp-polarity" +_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'" +_SEQ_CLASS_EXPECTED_LOSS = 0.01 + TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ "bert-base-uncased", "bert-large-uncased", @@ -1197,11 +1216,11 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss): >>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") >>> model = TFBertForPreTraining.from_pretrained("bert-base-uncased") - >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[ - ... None, : - >>> ] # Batch size 1 + >>> input_ids = tokenizer("Hello, my dog is cute", add_special_tokens=True, return_tensors="tf") + >>> # Batch size 1 + >>> outputs = model(input_ids) - >>> prediction_scores, seq_relationship_scores = outputs[:2] + >>> prediction_logits, seq_relationship_logits = outputs[:2] ```""" outputs = self.bert( input_ids=input_ids, @@ -1285,6 +1304,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, + expected_output="'paris'", + expected_loss=0.88, ) def call( self, @@ -1606,9 +1627,11 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, + expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) def call( self, @@ -1833,9 +1856,11 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT, + expected_loss=_TOKEN_CLASS_EXPECTED_LOSS, ) def call( self, @@ -1923,9 +1948,11 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss) @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_QA, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_QA_EXPECTED_OUTPUT, + expected_loss=_QA_EXPECTED_LOSS, ) def call( self, diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index d57b835700..4eb1fbf69d 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -59,6 +59,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased" _CONFIG_FOR_DOC = "MobileBertConfig" _TOKENIZER_FOR_DOC = "MobileBertTokenizer" +# TokenClassification docstring +_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "mrm8488/mobilebert-finetuned-ner" +_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']" +_TOKEN_CLASS_EXPECTED_LOSS = 0.03 + +# QuestionAnswering docstring +_CHECKPOINT_FOR_QA = "csarron/mobilebert-uncased-squad-v2" +_QA_EXPECTED_OUTPUT = "'a nice puppet'" +_QA_EXPECTED_LOSS = 3.98 +_QA_TARGET_START_INDEX = 12 +_QA_TARGET_END_INDEX = 13 + +# SequenceClassification docstring +_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "lordtt13/emo-mobilebert" +_SEQ_CLASS_EXPECTED_OUTPUT = "'others'" +_SEQ_CLASS_EXPECTED_LOSS = "4.72" + MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"] @@ -962,9 +979,8 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel): >>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased") >>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased") - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze( - ... 0 - >>> ) # Batch size 1 + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) + >>> # Batch size 1 >>> outputs = model(input_ids) >>> prediction_logits = outputs.prediction_logits @@ -1039,6 +1055,8 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, + expected_output="'paris'", + expected_loss=0.57, ) def forward( self, @@ -1229,9 +1247,11 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel): @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, + expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) def forward( self, @@ -1330,9 +1350,13 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel): @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_QA, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + qa_target_start_index=_QA_TARGET_START_INDEX, + qa_target_end_index=_QA_TARGET_END_INDEX, + expected_output=_QA_EXPECTED_OUTPUT, + expected_loss=_QA_EXPECTED_LOSS, ) def forward( self, @@ -1536,9 +1560,11 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel): @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT, + expected_loss=_TOKEN_CLASS_EXPECTED_LOSS, ) def forward( self, diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 5d1c74252e..2e2282cce6 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -63,6 +63,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased" _CONFIG_FOR_DOC = "MobileBertConfig" _TOKENIZER_FOR_DOC = "MobileBertTokenizer" +# TokenClassification docstring +_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "vumichien/mobilebert-finetuned-ner" +_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']" +_TOKEN_CLASS_EXPECTED_LOSS = 0.03 + +# QuestionAnswering docstring +_CHECKPOINT_FOR_QA = "vumichien/mobilebert-uncased-squad-v2" +_QA_EXPECTED_OUTPUT = "'a nice puppet'" +_QA_EXPECTED_LOSS = 3.98 +_QA_TARGET_START_INDEX = 12 +_QA_TARGET_END_INDEX = 13 + +# SequenceClassification docstring +_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "vumichien/emo-mobilebert" +_SEQ_CLASS_EXPECTED_OUTPUT = "'others'" +_SEQ_CLASS_EXPECTED_LOSS = "4.72" + TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [ "google/mobilebert-uncased", # See all MobileBERT models at https://huggingface.co/models?filter=mobilebert @@ -1075,6 +1092,8 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC, + expected_output="'paris'", + expected_loss=0.57, ) def call( self, @@ -1265,9 +1284,11 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, + expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) def call( self, @@ -1357,9 +1378,13 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_QA, output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + qa_target_start_index=_QA_TARGET_START_INDEX, + qa_target_end_index=_QA_TARGET_END_INDEX, + expected_output=_QA_EXPECTED_OUTPUT, + expected_loss=_QA_EXPECTED_LOSS, ) def call( self, @@ -1601,9 +1626,11 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla @add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION, output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT, + expected_loss=_TOKEN_CLASS_EXPECTED_LOSS, ) def call( self, diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index 8fe33b240e..5c76137dde 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -5,6 +5,8 @@ docs/source/en/model_doc/tapex.mdx src/transformers/generation_utils.py src/transformers/models/bart/modeling_bart.py src/transformers/models/beit/modeling_beit.py +src/transformers/models/bert/modeling_bert.py +src/transformers/models/bert/modeling_tf_bert.py src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py src/transformers/models/blenderbot/modeling_blenderbot.py src/transformers/models/blenderbot_small/modeling_blenderbot_small.py @@ -16,6 +18,8 @@ src/transformers/models/glpn/modeling_glpn.py src/transformers/models/hubert/modeling_hubert.py src/transformers/models/marian/modeling_marian.py src/transformers/models/mbart/modeling_mbart.py +src/transformers/models/mobilebert/modeling_mobilebert.py +src/transformers/models/mobilebert/modeling_tf_mobilebert.py src/transformers/models/pegasus/modeling_pegasus.py src/transformers/models/plbart/modeling_plbart.py src/transformers/models/poolformer/modeling_poolformer.py