Add Doc Test for BERT (#16523)
* Add doctest BERT * make fixup * fix typo * change checkpoints * make fixup * define doctest output value, update doctest for mobilebert * solve fix-copies * update QA target start index and end index * change checkpoint for docs and reuse defined variable * Update src/transformers/models/bert/modeling_tf_bert.py Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * make fixup Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -63,6 +63,26 @@ _CHECKPOINT_FOR_DOC = "bert-base-uncased"
|
||||
_CONFIG_FOR_DOC = "BertConfig"
|
||||
_TOKENIZER_FOR_DOC = "BertTokenizer"
|
||||
|
||||
# TokenClassification docstring
|
||||
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
|
||||
_TOKEN_CLASS_EXPECTED_OUTPUT = (
|
||||
"['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] "
|
||||
)
|
||||
_TOKEN_CLASS_EXPECTED_LOSS = 0.01
|
||||
|
||||
# QuestionAnswering docstring
|
||||
_CHECKPOINT_FOR_QA = "deepset/bert-base-cased-squad2"
|
||||
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
|
||||
_QA_EXPECTED_LOSS = 7.41
|
||||
_QA_TARGET_START_INDEX = 14
|
||||
_QA_TARGET_END_INDEX = 15
|
||||
|
||||
# SequenceClassification docstring
|
||||
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "textattack/bert-base-uncased-yelp-polarity"
|
||||
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
|
||||
_SEQ_CLASS_EXPECTED_LOSS = 0.01
|
||||
|
||||
|
||||
BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"bert-base-uncased",
|
||||
"bert-large-uncased",
|
||||
@@ -1156,7 +1176,12 @@ class BertLMHeadModel(BertPreTrainedModel):
|
||||
self.cls.predictions.decoder = new_embeddings
|
||||
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC)
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=CausalLMOutputWithCrossAttentions,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
input_ids: Optional[torch.Tensor] = None,
|
||||
@@ -1176,48 +1201,27 @@ class BertLMHeadModel(BertPreTrainedModel):
|
||||
) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]:
|
||||
r"""
|
||||
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
|
||||
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
|
||||
if the model is configured as a decoder.
|
||||
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
|
||||
the model is configured as a decoder.
|
||||
encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
||||
Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used
|
||||
in the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
|
||||
Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
|
||||
the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
|
||||
|
||||
- 1 for tokens that are **not masked**,
|
||||
- 0 for tokens that are **masked**.
|
||||
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
||||
Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be
|
||||
in `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100`
|
||||
are ignored (masked), the loss is only computed for the tokens with labels n `[0, ...,
|
||||
config.vocab_size]`
|
||||
Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
|
||||
`[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
|
||||
ignored (masked), the loss is only computed for the tokens with labels n `[0, ..., config.vocab_size]`
|
||||
past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
|
||||
Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up
|
||||
decoding.
|
||||
Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
|
||||
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
|
||||
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
|
||||
all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
|
||||
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
|
||||
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
`decoder_input_ids` of shape `(batch_size, sequence_length)`.
|
||||
use_cache (`bool`, *optional*):
|
||||
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
|
||||
(see `past_key_values`).
|
||||
|
||||
Returns:
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
>>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig
|
||||
>>> import torch
|
||||
|
||||
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
|
||||
>>> config = BertConfig.from_pretrained("bert-base-cased")
|
||||
>>> config.is_decoder = True
|
||||
>>> model = BertLMHeadModel.from_pretrained("bert-base-cased", config=config)
|
||||
|
||||
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> prediction_logits = outputs.logits
|
||||
```
|
||||
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
|
||||
`past_key_values`).
|
||||
"""
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
if labels is not None:
|
||||
@@ -1315,6 +1319,8 @@ class BertForMaskedLM(BertPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=MaskedLMOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output="'paris'",
|
||||
expected_loss=0.88,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@@ -1517,9 +1523,11 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
|
||||
output_type=SequenceClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
|
||||
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@@ -1716,9 +1724,11 @@ class BertForTokenClassification(BertPreTrainedModel):
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
|
||||
output_type=TokenClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
|
||||
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@@ -1797,9 +1807,13 @@ class BertForQuestionAnswering(BertPreTrainedModel):
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_QA,
|
||||
output_type=QuestionAnsweringModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
qa_target_start_index=_QA_TARGET_START_INDEX,
|
||||
qa_target_end_index=_QA_TARGET_END_INDEX,
|
||||
expected_output=_QA_EXPECTED_OUTPUT,
|
||||
expected_loss=_QA_EXPECTED_LOSS,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
|
||||
@@ -65,10 +65,29 @@ from .configuration_bert import BertConfig
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
_CHECKPOINT_FOR_DOC = "bert-base-cased"
|
||||
_CHECKPOINT_FOR_DOC = "bert-base-uncased"
|
||||
_CONFIG_FOR_DOC = "BertConfig"
|
||||
_TOKENIZER_FOR_DOC = "BertTokenizer"
|
||||
|
||||
# TokenClassification docstring
|
||||
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
|
||||
_TOKEN_CLASS_EXPECTED_OUTPUT = (
|
||||
"['O', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', " "'I-LOC'] "
|
||||
)
|
||||
_TOKEN_CLASS_EXPECTED_LOSS = 0.01
|
||||
|
||||
# QuestionAnswering docstring
|
||||
_CHECKPOINT_FOR_QA = "ydshieh/bert-base-cased-squad2"
|
||||
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
|
||||
_QA_EXPECTED_LOSS = 7.41
|
||||
_QA_TARGET_START_INDEX = 14
|
||||
_QA_TARGET_END_INDEX = 15
|
||||
|
||||
# SequenceClassification docstring
|
||||
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "ydshieh/bert-base-uncased-yelp-polarity"
|
||||
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
|
||||
_SEQ_CLASS_EXPECTED_LOSS = 0.01
|
||||
|
||||
TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"bert-base-uncased",
|
||||
"bert-large-uncased",
|
||||
@@ -1197,11 +1216,11 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
|
||||
|
||||
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
||||
>>> model = TFBertForPreTraining.from_pretrained("bert-base-uncased")
|
||||
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[
|
||||
... None, :
|
||||
>>> ] # Batch size 1
|
||||
>>> input_ids = tokenizer("Hello, my dog is cute", add_special_tokens=True, return_tensors="tf")
|
||||
>>> # Batch size 1
|
||||
|
||||
>>> outputs = model(input_ids)
|
||||
>>> prediction_scores, seq_relationship_scores = outputs[:2]
|
||||
>>> prediction_logits, seq_relationship_logits = outputs[:2]
|
||||
```"""
|
||||
outputs = self.bert(
|
||||
input_ids=input_ids,
|
||||
@@ -1285,6 +1304,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=TFMaskedLMOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output="'paris'",
|
||||
expected_loss=0.88,
|
||||
)
|
||||
def call(
|
||||
self,
|
||||
@@ -1606,9 +1627,11 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
|
||||
output_type=TFSequenceClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
|
||||
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
|
||||
)
|
||||
def call(
|
||||
self,
|
||||
@@ -1833,9 +1856,11 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
|
||||
output_type=TFTokenClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
|
||||
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
|
||||
)
|
||||
def call(
|
||||
self,
|
||||
@@ -1923,9 +1948,11 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss)
|
||||
@add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_QA,
|
||||
output_type=TFQuestionAnsweringModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_QA_EXPECTED_OUTPUT,
|
||||
expected_loss=_QA_EXPECTED_LOSS,
|
||||
)
|
||||
def call(
|
||||
self,
|
||||
|
||||
@@ -59,6 +59,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
|
||||
_CONFIG_FOR_DOC = "MobileBertConfig"
|
||||
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
|
||||
|
||||
# TokenClassification docstring
|
||||
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "mrm8488/mobilebert-finetuned-ner"
|
||||
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
|
||||
_TOKEN_CLASS_EXPECTED_LOSS = 0.03
|
||||
|
||||
# QuestionAnswering docstring
|
||||
_CHECKPOINT_FOR_QA = "csarron/mobilebert-uncased-squad-v2"
|
||||
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
|
||||
_QA_EXPECTED_LOSS = 3.98
|
||||
_QA_TARGET_START_INDEX = 12
|
||||
_QA_TARGET_END_INDEX = 13
|
||||
|
||||
# SequenceClassification docstring
|
||||
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "lordtt13/emo-mobilebert"
|
||||
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
|
||||
_SEQ_CLASS_EXPECTED_LOSS = "4.72"
|
||||
|
||||
MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"]
|
||||
|
||||
|
||||
@@ -962,9 +979,8 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
|
||||
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
|
||||
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
|
||||
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(
|
||||
... 0
|
||||
>>> ) # Batch size 1
|
||||
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)
|
||||
>>> # Batch size 1
|
||||
>>> outputs = model(input_ids)
|
||||
|
||||
>>> prediction_logits = outputs.prediction_logits
|
||||
@@ -1039,6 +1055,8 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=MaskedLMOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output="'paris'",
|
||||
expected_loss=0.57,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@@ -1229,9 +1247,11 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
|
||||
output_type=SequenceClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
|
||||
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@@ -1330,9 +1350,13 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_QA,
|
||||
output_type=QuestionAnsweringModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
qa_target_start_index=_QA_TARGET_START_INDEX,
|
||||
qa_target_end_index=_QA_TARGET_END_INDEX,
|
||||
expected_output=_QA_EXPECTED_OUTPUT,
|
||||
expected_loss=_QA_EXPECTED_LOSS,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
@@ -1536,9 +1560,11 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
|
||||
output_type=TokenClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
|
||||
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
|
||||
)
|
||||
def forward(
|
||||
self,
|
||||
|
||||
@@ -63,6 +63,23 @@ _CHECKPOINT_FOR_DOC = "google/mobilebert-uncased"
|
||||
_CONFIG_FOR_DOC = "MobileBertConfig"
|
||||
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
|
||||
|
||||
# TokenClassification docstring
|
||||
_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "vumichien/mobilebert-finetuned-ner"
|
||||
_TOKEN_CLASS_EXPECTED_OUTPUT = "['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']"
|
||||
_TOKEN_CLASS_EXPECTED_LOSS = 0.03
|
||||
|
||||
# QuestionAnswering docstring
|
||||
_CHECKPOINT_FOR_QA = "vumichien/mobilebert-uncased-squad-v2"
|
||||
_QA_EXPECTED_OUTPUT = "'a nice puppet'"
|
||||
_QA_EXPECTED_LOSS = 3.98
|
||||
_QA_TARGET_START_INDEX = 12
|
||||
_QA_TARGET_END_INDEX = 13
|
||||
|
||||
# SequenceClassification docstring
|
||||
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "vumichien/emo-mobilebert"
|
||||
_SEQ_CLASS_EXPECTED_OUTPUT = "'others'"
|
||||
_SEQ_CLASS_EXPECTED_LOSS = "4.72"
|
||||
|
||||
TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"google/mobilebert-uncased",
|
||||
# See all MobileBERT models at https://huggingface.co/models?filter=mobilebert
|
||||
@@ -1075,6 +1092,8 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
output_type=TFMaskedLMOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output="'paris'",
|
||||
expected_loss=0.57,
|
||||
)
|
||||
def call(
|
||||
self,
|
||||
@@ -1265,9 +1284,11 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
|
||||
output_type=TFSequenceClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
|
||||
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
|
||||
)
|
||||
def call(
|
||||
self,
|
||||
@@ -1357,9 +1378,13 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_QA,
|
||||
output_type=TFQuestionAnsweringModelOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
qa_target_start_index=_QA_TARGET_START_INDEX,
|
||||
qa_target_end_index=_QA_TARGET_END_INDEX,
|
||||
expected_output=_QA_EXPECTED_OUTPUT,
|
||||
expected_loss=_QA_EXPECTED_LOSS,
|
||||
)
|
||||
def call(
|
||||
self,
|
||||
@@ -1601,9 +1626,11 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla
|
||||
@add_start_docstrings_to_model_forward(MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
processor_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
||||
checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
|
||||
output_type=TFTokenClassifierOutput,
|
||||
config_class=_CONFIG_FOR_DOC,
|
||||
expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
|
||||
expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
|
||||
)
|
||||
def call(
|
||||
self,
|
||||
|
||||
@@ -5,6 +5,8 @@ docs/source/en/model_doc/tapex.mdx
|
||||
src/transformers/generation_utils.py
|
||||
src/transformers/models/bart/modeling_bart.py
|
||||
src/transformers/models/beit/modeling_beit.py
|
||||
src/transformers/models/bert/modeling_bert.py
|
||||
src/transformers/models/bert/modeling_tf_bert.py
|
||||
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
|
||||
src/transformers/models/blenderbot/modeling_blenderbot.py
|
||||
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
|
||||
@@ -16,6 +18,8 @@ src/transformers/models/glpn/modeling_glpn.py
|
||||
src/transformers/models/hubert/modeling_hubert.py
|
||||
src/transformers/models/marian/modeling_marian.py
|
||||
src/transformers/models/mbart/modeling_mbart.py
|
||||
src/transformers/models/mobilebert/modeling_mobilebert.py
|
||||
src/transformers/models/mobilebert/modeling_tf_mobilebert.py
|
||||
src/transformers/models/pegasus/modeling_pegasus.py
|
||||
src/transformers/models/plbart/modeling_plbart.py
|
||||
src/transformers/models/poolformer/modeling_poolformer.py
|
||||
|
||||
Reference in New Issue
Block a user