Add Doc Test for BERT (#16523)

* Add doctest BERT

* make fixup

* fix typo

* change checkpoints

* make fixup

* define doctest output value, update doctest for mobilebert

* solve fix-copies

* update QA target start index and end index

* change checkpoint for docs and reuse defined variable

* Update src/transformers/models/bert/modeling_tf_bert.py

Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>

* make fixup

Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
Minh Chien Vu
2022-04-11 22:51:28 +09:00
committed by GitHub
parent 098b002644
commit 2831826bc6
5 changed files with 160 additions and 62 deletions

View File

@@ -63,6 +63,26 @@ _CHECKPOINT_FOR_DOC = "bert-base-uncased"
_CONFIG_FOR_DOC = "BertConfig"
_TOKENIZER_FOR_DOC = "BertTokenizer"
# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
_TOKEN_CLASS_EXPECTED_OUTPUT = (
"['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] "
)
_TOKEN_CLASS_EXPECTED_LOSS = 0.01
# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "deepset/bert-base-cased-squad2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 7.41
_QA_TARGET_START_INDEX = 14
_QA_TARGET_END_INDEX = 15
# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "textattack/bert-base-uncased-yelp-polarity"
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
_SEQ_CLASS_EXPECTED_LOSS = 0.01
BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"bert-base-uncased",
"bert-large-uncased",
@@ -1156,7 +1176,12 @@ class BertLMHeadModel(BertPreTrainedModel):
self.cls.predictions.decoder = new_embeddings
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=CausalLMOutputWithCrossAttentions,
config_class=_CONFIG_FOR_DOC,
)
def forward(
self,
input_ids: Optional[torch.Tensor] = None,
@@ -1176,48 +1201,27 @@ class BertLMHeadModel(BertPreTrainedModel):
) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]:
r"""
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
if the model is configured as a decoder.
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
the model is configured as a decoder.
encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used
in the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be
in `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100`
are ignored (masked), the loss is only computed for the tokens with labels n `[0, ...,
config.vocab_size]`
Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
`[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
ignored (masked), the loss is only computed for the tokens with labels n `[0, ..., config.vocab_size]`
past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up
decoding.
Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
`decoder_input_ids` of shape `(batch_size, sequence_length)`.
use_cache (`bool`, *optional*):
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
(see `past_key_values`).
Returns:
Example:
```python
>>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> config = BertConfig.from_pretrained("bert-base-cased")
>>> config.is_decoder = True
>>> model = BertLMHeadModel.from_pretrained("bert-base-cased", config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
>>> prediction_logits = outputs.logits
```
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
`past_key_values`).
"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None:
@@ -1315,6 +1319,8 @@ class BertForMaskedLM(BertPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=MaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="'paris'",
expected_loss=0.88,
)
def forward(
self,
@@ -1517,9 +1523,11 @@ class BertForSequenceClassification(BertPreTrainedModel):
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
)
def forward(
self,
@@ -1716,9 +1724,11 @@ class BertForTokenClassification(BertPreTrainedModel):
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
)
def forward(
self,
@@ -1797,9 +1807,13 @@ class BertForQuestionAnswering(BertPreTrainedModel):
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_QA,
output_type=QuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
qa_target_start_index=_QA_TARGET_START_INDEX,
qa_target_end_index=_QA_TARGET_END_INDEX,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
)
def forward(
self,

View File

@@ -65,10 +65,29 @@ from .configuration_bert import BertConfig
logger = logging.get_logger(__name__)
_CHECKPOINT_FOR_DOC = "bert-base-cased"
_CHECKPOINT_FOR_DOC = "bert-base-uncased"
_CONFIG_FOR_DOC = "BertConfig"
_TOKENIZER_FOR_DOC = "BertTokenizer"
# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
_TOKEN_CLASS_EXPECTED_OUTPUT = (
"['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] "
)
_TOKEN_CLASS_EXPECTED_LOSS = 0.01
# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "ydshieh/bert-base-cased-squad2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 7.41
_QA_TARGET_START_INDEX = 14
_QA_TARGET_END_INDEX = 15
# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "ydshieh/bert-base-uncased-yelp-polarity"
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
_SEQ_CLASS_EXPECTED_LOSS = 0.01
TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"bert-base-uncased",
"bert-large-uncased",
@@ -1197,11 +1216,11 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
>>> model = TFBertForPreTraining.from_pretrained("bert-base-uncased")
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[
... None, :
>>> ] # Batch size 1
>>> input_ids = tokenizer("Hello, my dog is cute", add_special_tokens=True, return_tensors="tf")
>>> # Batch size 1
>>> outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2]
>>> prediction_logits, seq_relationship_logits = outputs[:2]
```"""
outputs = self.bert(
input_ids=input_ids,
@@ -1285,6 +1304,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFMaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="'paris'",
expected_loss=0.88,
)
def call(
self,
@@ -1606,9 +1627,11 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=TFSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
)
def call(
self,
@@ -1833,9 +1856,11 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TFTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
)
def call(
self,
@@ -1923,9 +1948,11 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_QA,
output_type=TFQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
)
def call(
self,

View File

@@ -59,6 +59,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
_CONFIG_FOR_DOC = "MobileBertConfig"
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "mrm8488/mobilebert-finetuned-ner"
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
_TOKEN_CLASS_EXPECTED_LOSS = 0.03
# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "csarron/mobilebert-uncased-squad-v2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 3.98
_QA_TARGET_START_INDEX = 12
_QA_TARGET_END_INDEX = 13
# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "lordtt13/emo-mobilebert"
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
_SEQ_CLASS_EXPECTED_LOSS = "4.72"
MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"]
@@ -962,9 +979,8 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(
... 0
>>> ) # Batch size 1
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)
>>> # Batch size 1
>>> outputs = model(input_ids)
>>> prediction_logits = outputs.prediction_logits
@@ -1039,6 +1055,8 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=MaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="'paris'",
expected_loss=0.57,
)
def forward(
self,
@@ -1229,9 +1247,11 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
)
def forward(
self,
@@ -1330,9 +1350,13 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_QA,
output_type=QuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
qa_target_start_index=_QA_TARGET_START_INDEX,
qa_target_end_index=_QA_TARGET_END_INDEX,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
)
def forward(
self,
@@ -1536,9 +1560,11 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
)
def forward(
self,

View File

@@ -63,6 +63,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
_CONFIG_FOR_DOC = "MobileBertConfig"
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
# TokenClassification docstring
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "vumichien/mobilebert-finetuned-ner"
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
_TOKEN_CLASS_EXPECTED_LOSS = 0.03
# QuestionAnswering docstring
_CHECKPOINT_FOR_QA = "vumichien/mobilebert-uncased-squad-v2"
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
_QA_EXPECTED_LOSS = 3.98
_QA_TARGET_START_INDEX = 12
_QA_TARGET_END_INDEX = 13
# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "vumichien/emo-mobilebert"
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
_SEQ_CLASS_EXPECTED_LOSS = "4.72"
TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/mobilebert-uncased",
# See all MobileBERT models at https://huggingface.co/models?filter=mobilebert
@@ -1075,6 +1092,8 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFMaskedLMOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="'paris'",
expected_loss=0.57,
)
def call(
self,
@@ -1265,9 +1284,11 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
output_type=TFSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
)
def call(
self,
@@ -1357,9 +1378,13 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_QA,
output_type=TFQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
qa_target_start_index=_QA_TARGET_START_INDEX,
qa_target_end_index=_QA_TARGET_END_INDEX,
expected_output=_QA_EXPECTED_OUTPUT,
expected_loss=_QA_EXPECTED_LOSS,
)
def call(
self,
@@ -1601,9 +1626,11 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
output_type=TFTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
)
def call(
self,

View File

@@ -5,6 +5,8 @@ docs/source/en/model_doc/tapex.mdx
src/transformers/generation_utils.py
src/transformers/models/bart/modeling_bart.py
src/transformers/models/beit/modeling_beit.py
src/transformers/models/bert/modeling_bert.py
src/transformers/models/bert/modeling_tf_bert.py
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
src/transformers/models/blenderbot/modeling_blenderbot.py
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
@@ -16,6 +18,8 @@ src/transformers/models/glpn/modeling_glpn.py
src/transformers/models/hubert/modeling_hubert.py
src/transformers/models/marian/modeling_marian.py
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mobilebert/modeling_mobilebert.py
src/transformers/models/mobilebert/modeling_tf_mobilebert.py
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/plbart/modeling_plbart.py
src/transformers/models/poolformer/modeling_poolformer.py