From 5727dfcebe63b0bcdfe06f1d4e2670cb329037e3 Mon Sep 17 00:00:00 2001
From: Dan Tegzes <48134725+Tegzes@users.noreply.github.com>
Date: Tue, 30 Aug 2022 15:46:21 +0300
Subject: [PATCH] Added Docstrings for Deberta and DebertaV2 [PyTorch] (#18610)

* Added Doctest for Deberta Pytorch

* Added path in documentation test file

* Added docstrings for DebertaV2

* Revert "Added docstrings for DebertaV2"

This reverts commit 307185e62a21b3bd0923444cc8a8af1747fd2600.

* Added DebertaV2 Docstrings
---
 .../models/deberta/modeling_deberta.py        | 45 +++++++++++++++++--
 .../models/deberta_v2/modeling_deberta_v2.py  | 44 ++++++++++++++++--
 src/transformers/utils/doc.py                 |  2 +-
 utils/documentation_tests.txt                 |  2 +
 4 files changed, 84 insertions(+), 9 deletions(-)

diff --git a/src/transformers/models/deberta/modeling_deberta.py b/src/transformers/models/deberta/modeling_deberta.py
index 0fbb66ba8f..581d9d1f4f 100644
--- a/src/transformers/models/deberta/modeling_deberta.py
+++ b/src/transformers/models/deberta/modeling_deberta.py
@@ -41,6 +41,32 @@ _CONFIG_FOR_DOC = "DebertaConfig"
 _TOKENIZER_FOR_DOC = "DebertaTokenizer"
 _CHECKPOINT_FOR_DOC = "microsoft/deberta-base"
 
+# Masked LM docstring
+_CHECKPOINT_FOR_MASKED_LM = "lsanochkin/deberta-large-feedback"
+_MASKED_LM_EXPECTED_OUTPUT = "' Paris'"
+_MASKED_LM_EXPECTED_LOSS = "0.54"
+
+# TokenClassification docstring
+_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbsamu/deberta-base-finetuned-ner"
+_TOKEN_CLASS_EXPECTED_OUTPUT = (
+    "['LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0',"
+    " 'LABEL_0', 'LABEL_0']"
+)
+_TOKEN_CLASS_EXPECTED_LOSS = 0.04
+
+# QuestionAnswering docstring
+_CHECKPOINT_FOR_QA = "Palak/microsoft_deberta-large_squad"
+_QA_EXPECTED_OUTPUT = "' a nice puppet'"
+_QA_EXPECTED_LOSS = 0.14
+_QA_TARGET_START_INDEX = 12
+_QA_TARGET_END_INDEX = 14
+
+# SequenceClassification docstring
+_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta"
+_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_0'"
+_SEQ_CLASS_EXPECTED_LOSS = "0.69"
+
+
 DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
     "microsoft/deberta-base",
     "microsoft/deberta-large",
@@ -1032,9 +1058,12 @@ class DebertaForMaskedLM(DebertaPreTrainedModel):
     @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_MASKED_LM,
         output_type=MaskedLMOutput,
         config_class=_CONFIG_FOR_DOC,
+        mask="[MASK]",
+        expected_output=_MASKED_LM_EXPECTED_OUTPUT,
+        expected_loss=_MASKED_LM_EXPECTED_LOSS,
     )
     def forward(
         self,
@@ -1173,9 +1202,11 @@ class DebertaForSequenceClassification(DebertaPreTrainedModel):
     @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
         output_type=SequenceClassifierOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
+        expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
     )
     def forward(
         self,
@@ -1281,9 +1312,11 @@ class DebertaForTokenClassification(DebertaPreTrainedModel):
     @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
         output_type=TokenClassifierOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
+        expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
     )
     def forward(
         self,
@@ -1356,9 +1389,13 @@ class DebertaForQuestionAnswering(DebertaPreTrainedModel):
     @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_QA,
         output_type=QuestionAnsweringModelOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output=_QA_EXPECTED_OUTPUT,
+        expected_loss=_QA_EXPECTED_LOSS,
+        qa_target_start_index=_QA_TARGET_START_INDEX,
+        qa_target_end_index=_QA_TARGET_END_INDEX,
     )
     def forward(
         self,
diff --git a/src/transformers/models/deberta_v2/modeling_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_deberta_v2.py
index 1a9252a7d3..46f7e00b96 100644
--- a/src/transformers/models/deberta_v2/modeling_deberta_v2.py
+++ b/src/transformers/models/deberta_v2/modeling_deberta_v2.py
@@ -43,6 +43,31 @@ _CONFIG_FOR_DOC = "DebertaV2Config"
 _TOKENIZER_FOR_DOC = "DebertaV2Tokenizer"
 _CHECKPOINT_FOR_DOC = "microsoft/deberta-v2-xlarge"
 
+# Masked LM docstring
+_CHECKPOINT_FOR_MASKED_LM = "hf-internal-testing/tiny-random-deberta-v2"
+_MASKED_LM_EXPECTED_OUTPUT = "'enberry'"
+_MASKED_LM_EXPECTED_LOSS = "11.85"
+
+# TokenClassification docstring
+_CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta-v2"
+_TOKEN_CLASS_EXPECTED_OUTPUT = (
+    "['LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0',"
+    " 'LABEL_0', 'LABEL_0']"
+)
+_TOKEN_CLASS_EXPECTED_LOSS = 0.61
+
+# QuestionAnswering docstring
+_CHECKPOINT_FOR_QA = "hf-internal-testing/tiny-random-deberta-v2"
+_QA_EXPECTED_OUTPUT = "'was Jim Henson? Jim Henson was'"
+_QA_EXPECTED_LOSS = 2.47
+_QA_TARGET_START_INDEX = 2
+_QA_TARGET_END_INDEX = 9
+
+# SequenceClassification docstring
+_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta-v2"
+_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
+_SEQ_CLASS_EXPECTED_LOSS = "0.69"
+
 DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST = [
     "microsoft/deberta-v2-xlarge",
     "microsoft/deberta-v2-xxlarge",
@@ -1136,9 +1161,12 @@ class DebertaV2ForMaskedLM(DebertaV2PreTrainedModel):
     @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_MASKED_LM,
         output_type=MaskedLMOutput,
         config_class=_CONFIG_FOR_DOC,
+        mask="[MASK]",
+        expected_output=_MASKED_LM_EXPECTED_OUTPUT,
+        expected_loss=_MASKED_LM_EXPECTED_LOSS,
     )
     def forward(
         self,
@@ -1278,9 +1306,11 @@ class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel):
     @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
         output_type=SequenceClassifierOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
+        expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
     )
     def forward(
         self,
@@ -1387,9 +1417,11 @@ class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel):
     @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
         output_type=TokenClassifierOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
+        expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
     )
     def forward(
         self,
@@ -1463,9 +1495,13 @@ class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel):
     @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint=_CHECKPOINT_FOR_QA,
         output_type=QuestionAnsweringModelOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output=_QA_EXPECTED_OUTPUT,
+        expected_loss=_QA_EXPECTED_LOSS,
+        qa_target_start_index=_QA_TARGET_START_INDEX,
+        qa_target_end_index=_QA_TARGET_END_INDEX,
     )
     def forward(
         self,
diff --git a/src/transformers/utils/doc.py b/src/transformers/utils/doc.py
index 6761dec9c9..9e3c7fce70 100644
--- a/src/transformers/utils/doc.py
+++ b/src/transformers/utils/doc.py
@@ -242,7 +242,7 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
     >>> num_labels = len(model.config.id2label)
     >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels)
 
-    >>> labels = torch.tensor(1)
+    >>> labels = torch.tensor([1])
     >>> loss = model(**inputs, labels=labels).loss
     >>> round(loss.item(), 2)
     {expected_loss}
diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt
index b03dcf5117..7545b49860 100644
--- a/utils/documentation_tests.txt
+++ b/utils/documentation_tests.txt
@@ -26,6 +26,8 @@ src/transformers/models/ctrl/modeling_ctrl.py
 src/transformers/models/cvt/modeling_cvt.py
 src/transformers/models/data2vec/modeling_data2vec_audio.py
 src/transformers/models/data2vec/modeling_data2vec_vision.py
+src/transformers/models/deberta/modeling_deberta.py
+src/transformers/models/deberta_v2/modeling_deberta_v2.py
 src/transformers/models/deit/modeling_deit.py
 src/transformers/models/deit/modeling_tf_deit.py
 src/transformers/models/detr/modeling_detr.py