From 5727dfcebe63b0bcdfe06f1d4e2670cb329037e3 Mon Sep 17 00:00:00 2001 From: Dan Tegzes <48134725+Tegzes@users.noreply.github.com> Date: Tue, 30 Aug 2022 15:46:21 +0300 Subject: [PATCH] Added Docstrings for Deberta and DebertaV2 [PyTorch] (#18610) * Added Doctest for Deberta Pytorch * Added path in documentation test file * Added docstrings for DebertaV2 * Revert "Added docstrings for DebertaV2" This reverts commit 307185e62a21b3bd0923444cc8a8af1747fd2600. * Added DebertaV2 Docstrings --- .../models/deberta/modeling_deberta.py | 45 +++++++++++++++++-- .../models/deberta_v2/modeling_deberta_v2.py | 44 ++++++++++++++++-- src/transformers/utils/doc.py | 2 +- utils/documentation_tests.txt | 2 + 4 files changed, 84 insertions(+), 9 deletions(-) diff --git a/src/transformers/models/deberta/modeling_deberta.py b/src/transformers/models/deberta/modeling_deberta.py index 0fbb66ba8f..581d9d1f4f 100644 --- a/src/transformers/models/deberta/modeling_deberta.py +++ b/src/transformers/models/deberta/modeling_deberta.py @@ -41,6 +41,32 @@ _CONFIG_FOR_DOC = "DebertaConfig" _TOKENIZER_FOR_DOC = "DebertaTokenizer" _CHECKPOINT_FOR_DOC = "microsoft/deberta-base" +# Masked LM docstring +_CHECKPOINT_FOR_MASKED_LM = "lsanochkin/deberta-large-feedback" +_MASKED_LM_EXPECTED_OUTPUT = "' Paris'" +_MASKED_LM_EXPECTED_LOSS = "0.54" + +# TokenClassification docstring +_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbsamu/deberta-base-finetuned-ner" +_TOKEN_CLASS_EXPECTED_OUTPUT = ( + "['LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0'," + " 'LABEL_0', 'LABEL_0']" +) +_TOKEN_CLASS_EXPECTED_LOSS = 0.04 + +# QuestionAnswering docstring +_CHECKPOINT_FOR_QA = "Palak/microsoft_deberta-large_squad" +_QA_EXPECTED_OUTPUT = "' a nice puppet'" +_QA_EXPECTED_LOSS = 0.14 +_QA_TARGET_START_INDEX = 12 +_QA_TARGET_END_INDEX = 14 + +# SequenceClassification docstring +_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta" +_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_0'" +_SEQ_CLASS_EXPECTED_LOSS = "0.69" + + DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [ "microsoft/deberta-base", "microsoft/deberta-large", @@ -1032,9 +1058,12 @@ class DebertaForMaskedLM(DebertaPreTrainedModel): @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_MASKED_LM, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, + mask="[MASK]", + expected_output=_MASKED_LM_EXPECTED_OUTPUT, + expected_loss=_MASKED_LM_EXPECTED_LOSS, ) def forward( self, @@ -1173,9 +1202,11 @@ class DebertaForSequenceClassification(DebertaPreTrainedModel): @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, + expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) def forward( self, @@ -1281,9 +1312,11 @@ class DebertaForTokenClassification(DebertaPreTrainedModel): @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT, + expected_loss=_TOKEN_CLASS_EXPECTED_LOSS, ) def forward( self, @@ -1356,9 +1389,13 @@ class DebertaForQuestionAnswering(DebertaPreTrainedModel): @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_QA, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_QA_EXPECTED_OUTPUT, + expected_loss=_QA_EXPECTED_LOSS, + qa_target_start_index=_QA_TARGET_START_INDEX, + qa_target_end_index=_QA_TARGET_END_INDEX, ) def forward( self, diff --git a/src/transformers/models/deberta_v2/modeling_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_deberta_v2.py index 1a9252a7d3..46f7e00b96 100644 --- a/src/transformers/models/deberta_v2/modeling_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_deberta_v2.py @@ -43,6 +43,31 @@ _CONFIG_FOR_DOC = "DebertaV2Config" _TOKENIZER_FOR_DOC = "DebertaV2Tokenizer" _CHECKPOINT_FOR_DOC = "microsoft/deberta-v2-xlarge" +# Masked LM docstring +_CHECKPOINT_FOR_MASKED_LM = "hf-internal-testing/tiny-random-deberta-v2" +_MASKED_LM_EXPECTED_OUTPUT = "'enberry'" +_MASKED_LM_EXPECTED_LOSS = "11.85" + +# TokenClassification docstring +_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta-v2" +_TOKEN_CLASS_EXPECTED_OUTPUT = ( + "['LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0'," + " 'LABEL_0', 'LABEL_0']" +) +_TOKEN_CLASS_EXPECTED_LOSS = 0.61 + +# QuestionAnswering docstring +_CHECKPOINT_FOR_QA = "hf-internal-testing/tiny-random-deberta-v2" +_QA_EXPECTED_OUTPUT = "'was Jim Henson? Jim Henson was'" +_QA_EXPECTED_LOSS = 2.47 +_QA_TARGET_START_INDEX = 2 +_QA_TARGET_END_INDEX = 9 + +# SequenceClassification docstring +_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta-v2" +_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'" +_SEQ_CLASS_EXPECTED_LOSS = "0.69" + DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST = [ "microsoft/deberta-v2-xlarge", "microsoft/deberta-v2-xxlarge", @@ -1136,9 +1161,12 @@ class DebertaV2ForMaskedLM(DebertaV2PreTrainedModel): @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_MASKED_LM, output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, + mask="[MASK]", + expected_output=_MASKED_LM_EXPECTED_OUTPUT, + expected_loss=_MASKED_LM_EXPECTED_LOSS, ) def forward( self, @@ -1278,9 +1306,11 @@ class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel): @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, + expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) def forward( self, @@ -1387,9 +1417,11 @@ class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel): @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT, + expected_loss=_TOKEN_CLASS_EXPECTED_LOSS, ) def forward( self, @@ -1463,9 +1495,13 @@ class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel): @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_QA, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_QA_EXPECTED_OUTPUT, + expected_loss=_QA_EXPECTED_LOSS, + qa_target_start_index=_QA_TARGET_START_INDEX, + qa_target_end_index=_QA_TARGET_END_INDEX, ) def forward( self, diff --git a/src/transformers/utils/doc.py b/src/transformers/utils/doc.py index 6761dec9c9..9e3c7fce70 100644 --- a/src/transformers/utils/doc.py +++ b/src/transformers/utils/doc.py @@ -242,7 +242,7 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r""" >>> num_labels = len(model.config.id2label) >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels) - >>> labels = torch.tensor(1) + >>> labels = torch.tensor([1]) >>> loss = model(**inputs, labels=labels).loss >>> round(loss.item(), 2) {expected_loss} diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index b03dcf5117..7545b49860 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -26,6 +26,8 @@ src/transformers/models/ctrl/modeling_ctrl.py src/transformers/models/cvt/modeling_cvt.py src/transformers/models/data2vec/modeling_data2vec_audio.py src/transformers/models/data2vec/modeling_data2vec_vision.py +src/transformers/models/deberta/modeling_deberta.py +src/transformers/models/deberta_v2/modeling_deberta_v2.py src/transformers/models/deit/modeling_deit.py src/transformers/models/deit/modeling_tf_deit.py src/transformers/models/detr/modeling_detr.py