Add Doc Test for BERT (#16523)
* Add doctest BERT * make fixup * fix typo * change checkpoints * make fixup * define doctest output value, update doctest for mobilebert * solve fix-copies * update QA target start index and end index * change checkpoint for docs and reuse defined variable * Update src/transformers/models/bert/modeling_tf_bert.py Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * make fixup Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -63,6 +63,26 @@ _CHECKPOINT_FOR_DOC = "bert-base-uncased"
|
|||||||
_CONFIG_FOR_DOC = "BertConfig"
|
_CONFIG_FOR_DOC = "BertConfig"
|
||||||
_TOKENIZER_FOR_DOC = "BertTokenizer"
|
_TOKENIZER_FOR_DOC = "BertTokenizer"
|
||||||
|
|
||||||
|
# TokenClassification docstring
|
||||||
|
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
|
||||||
|
_TOKEN_CLASS_EXPECTED_OUTPUT = (
|
||||||
|
"['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] "
|
||||||
|
)
|
||||||
|
_TOKEN_CLASS_EXPECTED_LOSS = 0.01
|
||||||
|
|
||||||
|
# QuestionAnswering docstring
|
||||||
|
_CHECKPOINT_FOR_QA = "deepset/bert-base-cased-squad2"
|
||||||
|
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
|
||||||
|
_QA_EXPECTED_LOSS = 7.41
|
||||||
|
_QA_TARGET_START_INDEX = 14
|
||||||
|
_QA_TARGET_END_INDEX = 15
|
||||||
|
|
||||||
|
# SequenceClassification docstring
|
||||||
|
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "textattack/bert-base-uncased-yelp-polarity"
|
||||||
|
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
|
||||||
|
_SEQ_CLASS_EXPECTED_LOSS = 0.01
|
||||||
|
|
||||||
|
|
||||||
BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||||
"bert-base-uncased",
|
"bert-base-uncased",
|
||||||
"bert-large-uncased",
|
"bert-large-uncased",
|
||||||
@@ -1156,7 +1176,12 @@ class BertLMHeadModel(BertPreTrainedModel):
|
|||||||
self.cls.predictions.decoder = new_embeddings
|
self.cls.predictions.decoder = new_embeddings
|
||||||
|
|
||||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC)
|
@add_code_sample_docstrings(
|
||||||
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
|
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||||
|
output_type=CausalLMOutputWithCrossAttentions,
|
||||||
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
input_ids: Optional[torch.Tensor] = None,
|
input_ids: Optional[torch.Tensor] = None,
|
||||||
@@ -1176,48 +1201,27 @@ class BertLMHeadModel(BertPreTrainedModel):
|
|||||||
) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]:
|
) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]:
|
||||||
r"""
|
r"""
|
||||||
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
||||||
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
|
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
|
||||||
if the model is configured as a decoder.
|
the model is configured as a decoder.
|
||||||
encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
||||||
Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used
|
Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
|
||||||
in the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
|
the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
|
||||||
|
|
||||||
- 1 for tokens that are **not masked**,
|
- 1 for tokens that are **not masked**,
|
||||||
- 0 for tokens that are **masked**.
|
- 0 for tokens that are **masked**.
|
||||||
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
||||||
Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be
|
Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
|
||||||
in `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100`
|
`[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
|
||||||
are ignored (masked), the loss is only computed for the tokens with labels n `[0, ...,
|
ignored (masked), the loss is only computed for the tokens with labels n `[0, ..., config.vocab_size]`
|
||||||
config.vocab_size]`
|
|
||||||
past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
|
past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
|
||||||
Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up
|
Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
|
||||||
decoding.
|
|
||||||
|
|
||||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
|
`decoder_input_ids` of shape `(batch_size, sequence_length)`.
|
||||||
use_cache (`bool`, *optional*):
|
use_cache (`bool`, *optional*):
|
||||||
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
|
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
|
||||||
(see `past_key_values`).
|
`past_key_values`).
|
||||||
|
|
||||||
Returns:
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
```python
|
|
||||||
>>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig
|
|
||||||
>>> import torch
|
|
||||||
|
|
||||||
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
|
||||||
>>> config = BertConfig.from_pretrained("bert-base-cased")
|
|
||||||
>>> config.is_decoder = True
|
|
||||||
>>> model = BertLMHeadModel.from_pretrained("bert-base-cased", config=config)
|
|
||||||
|
|
||||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
|
||||||
>>> outputs = model(**inputs)
|
|
||||||
|
|
||||||
>>> prediction_logits = outputs.logits
|
|
||||||
```
|
|
||||||
"""
|
"""
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
@@ -1315,6 +1319,8 @@ class BertForMaskedLM(BertPreTrainedModel):
|
|||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||||
output_type=MaskedLMOutput,
|
output_type=MaskedLMOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output="'paris'",
|
||||||
|
expected_loss=0.88,
|
||||||
)
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@@ -1517,9 +1523,11 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
|||||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
|
||||||
output_type=SequenceClassifierOutput,
|
output_type=SequenceClassifierOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@@ -1716,9 +1724,11 @@ class BertForTokenClassification(BertPreTrainedModel):
|
|||||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
|
||||||
output_type=TokenClassifierOutput,
|
output_type=TokenClassifierOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@@ -1797,9 +1807,13 @@ class BertForQuestionAnswering(BertPreTrainedModel):
|
|||||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_QA,
|
||||||
output_type=QuestionAnsweringModelOutput,
|
output_type=QuestionAnsweringModelOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
qa_target_start_index=_QA_TARGET_START_INDEX,
|
||||||
|
qa_target_end_index=_QA_TARGET_END_INDEX,
|
||||||
|
expected_output=_QA_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_QA_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -65,10 +65,29 @@ from .configuration_bert import BertConfig
|
|||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
_CHECKPOINT_FOR_DOC = "bert-base-cased"
|
_CHECKPOINT_FOR_DOC = "bert-base-uncased"
|
||||||
_CONFIG_FOR_DOC = "BertConfig"
|
_CONFIG_FOR_DOC = "BertConfig"
|
||||||
_TOKENIZER_FOR_DOC = "BertTokenizer"
|
_TOKENIZER_FOR_DOC = "BertTokenizer"
|
||||||
|
|
||||||
|
# TokenClassification docstring
|
||||||
|
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
|
||||||
|
_TOKEN_CLASS_EXPECTED_OUTPUT = (
|
||||||
|
"['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] "
|
||||||
|
)
|
||||||
|
_TOKEN_CLASS_EXPECTED_LOSS = 0.01
|
||||||
|
|
||||||
|
# QuestionAnswering docstring
|
||||||
|
_CHECKPOINT_FOR_QA = "ydshieh/bert-base-cased-squad2"
|
||||||
|
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
|
||||||
|
_QA_EXPECTED_LOSS = 7.41
|
||||||
|
_QA_TARGET_START_INDEX = 14
|
||||||
|
_QA_TARGET_END_INDEX = 15
|
||||||
|
|
||||||
|
# SequenceClassification docstring
|
||||||
|
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "ydshieh/bert-base-uncased-yelp-polarity"
|
||||||
|
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
|
||||||
|
_SEQ_CLASS_EXPECTED_LOSS = 0.01
|
||||||
|
|
||||||
TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||||
"bert-base-uncased",
|
"bert-base-uncased",
|
||||||
"bert-large-uncased",
|
"bert-large-uncased",
|
||||||
@@ -1197,11 +1216,11 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
|
|||||||
|
|
||||||
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
||||||
>>> model = TFBertForPreTraining.from_pretrained("bert-base-uncased")
|
>>> model = TFBertForPreTraining.from_pretrained("bert-base-uncased")
|
||||||
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[
|
>>> input_ids = tokenizer("Hello, my dog is cute", add_special_tokens=True, return_tensors="tf")
|
||||||
... None, :
|
>>> # Batch size 1
|
||||||
>>> ] # Batch size 1
|
|
||||||
>>> outputs = model(input_ids)
|
>>> outputs = model(input_ids)
|
||||||
>>> prediction_scores, seq_relationship_scores = outputs[:2]
|
>>> prediction_logits, seq_relationship_logits = outputs[:2]
|
||||||
```"""
|
```"""
|
||||||
outputs = self.bert(
|
outputs = self.bert(
|
||||||
input_ids=input_ids,
|
input_ids=input_ids,
|
||||||
@@ -1285,6 +1304,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
|||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||||
output_type=TFMaskedLMOutput,
|
output_type=TFMaskedLMOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output="'paris'",
|
||||||
|
expected_loss=0.88,
|
||||||
)
|
)
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
@@ -1606,9 +1627,11 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
|
|||||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
|
||||||
output_type=TFSequenceClassifierOutput,
|
output_type=TFSequenceClassifierOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
@@ -1833,9 +1856,11 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
|
|||||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
|
||||||
output_type=TFTokenClassifierOutput,
|
output_type=TFTokenClassifierOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
@@ -1923,9 +1948,11 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
|
|||||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_QA,
|
||||||
output_type=TFQuestionAnsweringModelOutput,
|
output_type=TFQuestionAnsweringModelOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_QA_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_QA_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -59,6 +59,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
|
|||||||
_CONFIG_FOR_DOC = "MobileBertConfig"
|
_CONFIG_FOR_DOC = "MobileBertConfig"
|
||||||
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
|
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
|
||||||
|
|
||||||
|
# TokenClassification docstring
|
||||||
|
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "mrm8488/mobilebert-finetuned-ner"
|
||||||
|
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
|
||||||
|
_TOKEN_CLASS_EXPECTED_LOSS = 0.03
|
||||||
|
|
||||||
|
# QuestionAnswering docstring
|
||||||
|
_CHECKPOINT_FOR_QA = "csarron/mobilebert-uncased-squad-v2"
|
||||||
|
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
|
||||||
|
_QA_EXPECTED_LOSS = 3.98
|
||||||
|
_QA_TARGET_START_INDEX = 12
|
||||||
|
_QA_TARGET_END_INDEX = 13
|
||||||
|
|
||||||
|
# SequenceClassification docstring
|
||||||
|
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "lordtt13/emo-mobilebert"
|
||||||
|
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
|
||||||
|
_SEQ_CLASS_EXPECTED_LOSS = "4.72"
|
||||||
|
|
||||||
MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"]
|
MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"]
|
||||||
|
|
||||||
|
|
||||||
@@ -962,9 +979,8 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
|
|||||||
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
|
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
|
||||||
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
|
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
|
||||||
|
|
||||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(
|
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)
|
||||||
... 0
|
>>> # Batch size 1
|
||||||
>>> ) # Batch size 1
|
|
||||||
>>> outputs = model(input_ids)
|
>>> outputs = model(input_ids)
|
||||||
|
|
||||||
>>> prediction_logits = outputs.prediction_logits
|
>>> prediction_logits = outputs.prediction_logits
|
||||||
@@ -1039,6 +1055,8 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
|||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||||
output_type=MaskedLMOutput,
|
output_type=MaskedLMOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output="'paris'",
|
||||||
|
expected_loss=0.57,
|
||||||
)
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@@ -1229,9 +1247,11 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
|
|||||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
|
||||||
output_type=SequenceClassifierOutput,
|
output_type=SequenceClassifierOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@@ -1330,9 +1350,13 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
|
|||||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_QA,
|
||||||
output_type=QuestionAnsweringModelOutput,
|
output_type=QuestionAnsweringModelOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
qa_target_start_index=_QA_TARGET_START_INDEX,
|
||||||
|
qa_target_end_index=_QA_TARGET_END_INDEX,
|
||||||
|
expected_output=_QA_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_QA_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@@ -1536,9 +1560,11 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
|
|||||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
|
||||||
output_type=TokenClassifierOutput,
|
output_type=TokenClassifierOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -63,6 +63,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
|
|||||||
_CONFIG_FOR_DOC = "MobileBertConfig"
|
_CONFIG_FOR_DOC = "MobileBertConfig"
|
||||||
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
|
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
|
||||||
|
|
||||||
|
# TokenClassification docstring
|
||||||
|
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "vumichien/mobilebert-finetuned-ner"
|
||||||
|
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
|
||||||
|
_TOKEN_CLASS_EXPECTED_LOSS = 0.03
|
||||||
|
|
||||||
|
# QuestionAnswering docstring
|
||||||
|
_CHECKPOINT_FOR_QA = "vumichien/mobilebert-uncased-squad-v2"
|
||||||
|
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
|
||||||
|
_QA_EXPECTED_LOSS = 3.98
|
||||||
|
_QA_TARGET_START_INDEX = 12
|
||||||
|
_QA_TARGET_END_INDEX = 13
|
||||||
|
|
||||||
|
# SequenceClassification docstring
|
||||||
|
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "vumichien/emo-mobilebert"
|
||||||
|
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
|
||||||
|
_SEQ_CLASS_EXPECTED_LOSS = "4.72"
|
||||||
|
|
||||||
TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||||
"google/mobilebert-uncased",
|
"google/mobilebert-uncased",
|
||||||
# See all MobileBERT models at https://huggingface.co/models?filter=mobilebert
|
# See all MobileBERT models at https://huggingface.co/models?filter=mobilebert
|
||||||
@@ -1075,6 +1092,8 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
|
|||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||||
output_type=TFMaskedLMOutput,
|
output_type=TFMaskedLMOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output="'paris'",
|
||||||
|
expected_loss=0.57,
|
||||||
)
|
)
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
@@ -1265,9 +1284,11 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
|
|||||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
|
||||||
output_type=TFSequenceClassifierOutput,
|
output_type=TFSequenceClassifierOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
@@ -1357,9 +1378,13 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
|
|||||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_QA,
|
||||||
output_type=TFQuestionAnsweringModelOutput,
|
output_type=TFQuestionAnsweringModelOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
qa_target_start_index=_QA_TARGET_START_INDEX,
|
||||||
|
qa_target_end_index=_QA_TARGET_END_INDEX,
|
||||||
|
expected_output=_QA_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_QA_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
@@ -1601,9 +1626,11 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
|
|||||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@add_code_sample_docstrings(
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
processor_class=_TOKENIZER_FOR_DOC,
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
|
||||||
output_type=TFTokenClassifierOutput,
|
output_type=TFTokenClassifierOutput,
|
||||||
config_class=_CONFIG_FOR_DOC,
|
config_class=_CONFIG_FOR_DOC,
|
||||||
|
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
|
||||||
|
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
|
||||||
)
|
)
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ docs/source/en/model_doc/tapex.mdx
|
|||||||
src/transformers/generation_utils.py
|
src/transformers/generation_utils.py
|
||||||
src/transformers/models/bart/modeling_bart.py
|
src/transformers/models/bart/modeling_bart.py
|
||||||
src/transformers/models/beit/modeling_beit.py
|
src/transformers/models/beit/modeling_beit.py
|
||||||
|
src/transformers/models/bert/modeling_bert.py
|
||||||
|
src/transformers/models/bert/modeling_tf_bert.py
|
||||||
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
|
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
|
||||||
src/transformers/models/blenderbot/modeling_blenderbot.py
|
src/transformers/models/blenderbot/modeling_blenderbot.py
|
||||||
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
|
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
|
||||||
@@ -16,6 +18,8 @@ src/transformers/models/glpn/modeling_glpn.py
|
|||||||
src/transformers/models/hubert/modeling_hubert.py
|
src/transformers/models/hubert/modeling_hubert.py
|
||||||
src/transformers/models/marian/modeling_marian.py
|
src/transformers/models/marian/modeling_marian.py
|
||||||
src/transformers/models/mbart/modeling_mbart.py
|
src/transformers/models/mbart/modeling_mbart.py
|
||||||
|
src/transformers/models/mobilebert/modeling_mobilebert.py
|
||||||
|
src/transformers/models/mobilebert/modeling_tf_mobilebert.py
|
||||||
src/transformers/models/pegasus/modeling_pegasus.py
|
src/transformers/models/pegasus/modeling_pegasus.py
|
||||||
src/transformers/models/plbart/modeling_plbart.py
|
src/transformers/models/plbart/modeling_plbart.py
|
||||||
src/transformers/models/poolformer/modeling_poolformer.py
|
src/transformers/models/poolformer/modeling_poolformer.py
|
||||||
|
|||||||
Reference in New Issue
Block a user