From 9badcecf694f174da929bcfe668ca57851960ad8 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Thu, 24 Mar 2022 10:26:27 +0100
Subject: [PATCH] [Doctests] Make TFRoberta-like meaningfull  (#16370)

* update doc examples for TFRoberta

* fix style

* fix style

* use TF ckpt

* apply suggestion

* add the code file to test here

* fix style

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 .../models/roberta/modeling_tf_roberta.py     | 15 ++-
 src/transformers/utils/doc.py                 | 93 ++++++++++++++-----
 utils/documentation_tests.txt                 |  1 +
 3 files changed, 84 insertions(+), 25 deletions(-)
diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py
index 4ae381451c..bbdf7ebf33 100644
--- a/src/transformers/models/roberta/modeling_tf_roberta.py
+++ b/src/transformers/models/roberta/modeling_tf_roberta.py
@@ -1076,6 +1076,9 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos
         checkpoint=_CHECKPOINT_FOR_DOC,
         output_type=TFMaskedLMOutput,
         config_class=_CONFIG_FOR_DOC,
+        mask="<mask>",
+        expected_output="' Paris'",
+        expected_loss=0.1,
     )
     def call(
         self,
@@ -1331,9 +1334,11 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla
     @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint="cardiffnlp/twitter-roberta-base-emotion",
         output_type=TFSequenceClassifierOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output="'optimism'",
+        expected_loss=0.08,
     )
     def call(
         self,
@@ -1543,9 +1548,11 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific
     @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint="ydshieh/roberta-large-ner-english",
         output_type=TFTokenClassifierOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output="['O', 'ORG', 'ORG', 'O', 'O', 'O', 'O', 'O', 'LOC', 'O', 'LOC', 'LOC']",
+        expected_loss=0.01,
     )
     def call(
         self,
@@ -1628,9 +1635,11 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin
     @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @add_code_sample_docstrings(
         processor_class=_TOKENIZER_FOR_DOC,
-        checkpoint=_CHECKPOINT_FOR_DOC,
+        checkpoint="ydshieh/roberta-base-squad2",
         output_type=TFQuestionAnsweringModelOutput,
         config_class=_CONFIG_FOR_DOC,
+        expected_output="' puppet'",
+        expected_loss=0.86,
     )
     def call(
         self,
diff --git a/src/transformers/utils/doc.py b/src/transformers/utils/doc.py
index 17f8adeb26..394d2aaa2f 100644
--- a/src/transformers/utils/doc.py
+++ b/src/transformers/utils/doc.py
@@ -618,15 +618,26 @@ TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
     >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
     >>> model = {model_class}.from_pretrained("{checkpoint}")
 
-    >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
-    >>> input_ids = inputs["input_ids"]
-    >>> inputs["labels"] = tf.reshape(
-    ...     tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))
-    >>> )  # Batch size 1
+    >>> inputs = tokenizer(
+    ...     "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="tf"
+    ... )
 
-    >>> outputs = model(inputs)
-    >>> loss = outputs.loss
-    >>> logits = outputs.logits
+    >>> logits = model(**inputs).logits
+    >>> predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
+
+    >>> # Note that tokens are classified rather then input words which means that
+    >>> # there might be more predicted token classes than words.
+    >>> # Multiple token classes might account for the same word
+    >>> predicted_tokens_classes = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
+    >>> predicted_tokens_classes
+    {expected_output}
+    ```
+
+    ```python
+    >>> labels = predicted_token_class_ids
+    >>> loss = tf.math.reduce_mean(model(**inputs, labels=labels).loss)
+    >>> round(float(loss), 2)
+    {expected_loss}
     ```
 """
 
@@ -641,13 +652,26 @@ TF_QUESTION_ANSWERING_SAMPLE = r"""
     >>> model = {model_class}.from_pretrained("{checkpoint}")
 
     >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
-    >>> input_dict = tokenizer(question, text, return_tensors="tf")
-    >>> outputs = model(input_dict)
-    >>> start_logits = outputs.start_logits
-    >>> end_logits = outputs.end_logits
 
-    >>> all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
-    >>> answer = " ".join(all_tokens[tf.math.argmax(start_logits, 1)[0] : tf.math.argmax(end_logits, 1)[0] + 1])
+    >>> inputs = tokenizer(question, text, return_tensors="tf")
+    >>> outputs = model(**inputs)
+
+    >>> answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+    >>> answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
+
+    >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+    >>> tokenizer.decode(predict_answer_tokens)
+    {expected_output}
+    ```
+
+    ```python
+    >>> # target is "nice puppet"
+    >>> target_start_index, target_end_index = tf.constant([14]), tf.constant([15])
+
+    >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)
+    >>> loss = tf.math.reduce_mean(outputs.loss)
+    >>> round(float(loss), 2)
+    {expected_loss}
     ```
 """
 
@@ -662,11 +686,23 @@ TF_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
     >>> model = {model_class}.from_pretrained("{checkpoint}")
 
     >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
-    >>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1))  # Batch size 1
 
-    >>> outputs = model(inputs)
-    >>> loss = outputs.loss
-    >>> logits = outputs.logits
+    >>> logits = model(**inputs).logits
+
+    >>> predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+    >>> model.config.id2label[predicted_class_id]
+    {expected_output}
+    ```
+
+    ```python
+    >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
+    >>> num_labels = len(model.config.id2label)
+    >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels)
+
+    >>> labels = tf.constant(1)
+    >>> loss = model(**inputs, labels=labels).loss
+    >>> round(float(loss), 2)
+    {expected_loss}
     ```
 """
 
@@ -681,11 +717,24 @@ TF_MASKED_LM_SAMPLE = r"""
     >>> model = {model_class}.from_pretrained("{checkpoint}")
 
     >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf")
-    >>> inputs["labels"] = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
+    >>> logits = model(**inputs).logits
 
-    >>> outputs = model(inputs)
-    >>> loss = outputs.loss
-    >>> logits = outputs.logits
+    >>> # retrieve index of {mask}
+    >>> mask_token_index = tf.where(inputs.input_ids == tokenizer.mask_token_id)[0][1]
+
+    >>> predicted_token_id = tf.math.argmax(logits[0, mask_token_index], axis=-1)
+    >>> tokenizer.decode(predicted_token_id)
+    {expected_output}
+    ```
+
+    ```python
+    >>> labels = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
+    >>> # mask labels of non-{mask} tokens
+    >>> labels = tf.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100)
+
+    >>> outputs = model(**inputs, labels=labels)
+    >>> round(float(outputs.loss), 2)
+    {expected_loss}
     ```
 """
 
diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt
index 7d31045184..b5d9f8570c 100644
--- a/utils/documentation_tests.txt
+++ b/utils/documentation_tests.txt
@@ -30,6 +30,7 @@ src/transformers/models/poolformer/modeling_poolformer.py
 src/transformers/models/resnet/modeling_resnet.py
 src/transformers/models/resnet/modeling_resnet.py
 src/transformers/models/roberta/modeling_roberta.py
+src/transformers/models/roberta/modeling_tf_roberta.py
 src/transformers/models/segformer/modeling_segformer.py
 src/transformers/models/sew/modeling_sew.py
 src/transformers/models/sew_d/modeling_sew_d.py