Fix task summary doctest (#21200)

* add outputs to code snippets * fix example text * apply feedback * style changes * make style
2023-01-20 09:58:07 -08:00
parent 425ff71c4e
commit 142ad1a1cc
1 changed files with 92 additions and 18 deletions
--- a/docs/source/en/task_summary.mdx
+++ b/docs/source/en/task_summary.mdx
@@ -34,8 +34,14 @@ Audio classification is a task that labels audio data from a predefined set of c
 ```py
 >>> from transformers import pipeline

->>> classifier = pipeline(task="audio-classification")
->>> classifier("path/to/audio/file.mp3")
+>>> classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
+>>> preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+>>> preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+>>> preds
+[{'score': 0.4532, 'label': 'hap'},
+ {'score': 0.3622, 'label': 'sad'},
+ {'score': 0.0943, 'label': 'neu'},
+ {'score': 0.0903, 'label': 'ang'}]
 ```

 ### Automatic speech recognition
@@ -47,8 +53,9 @@ But one of the key challenges Transformer architectures have helped with is in l
 ```py
 >>> from transformers import pipeline

->>> transcriber = pipeline(task="automatic-speech-recognition")
->>> transcriber("path/to/audio/file.mp3")
+>>> transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
+>>> transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
 ```

 ## Computer vision
@@ -73,7 +80,16 @@ Image classification labels an entire image from a predefined set of classes. Li
 >>> from transformers import pipeline

 >>> classifier = pipeline(task="image-classification")
->>> classifier("path/to/image/file.jpg")
+>>> preds = classifier(
+...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+... )
+>>> preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+>>> print(*preds, sep="\n")
+{'score': 0.4403, 'label': 'lynx, catamount'}
+{'score': 0.0343, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
+{'score': 0.0321, 'label': 'snow leopard, ounce, Panthera uncia'}
+{'score': 0.0235, 'label': 'Egyptian cat'}
+{'score': 0.023, 'label': 'tiger cat'}
 ```

 ### Object detection
@@ -88,7 +104,14 @@ Unlike image classification, object detection identifies multiple objects within
 >>> from transformers import pipeline

 >>> detector = pipeline(task="object-detection")
->>> detector("path/to/image/file.jpg")
+>>> preds = detector(
+...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+... )
+>>> preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
+>>> preds
+[{'score': 0.9865,
+  'label': 'cat',
+  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
 ```

 ### Image segmentation
@@ -104,7 +127,14 @@ Segmentation tasks are helpful in self-driving vehicles to create a pixel-level
 >>> from transformers import pipeline

 >>> segmenter = pipeline(task="image-segmentation")
->>> segmenter("path/to/image/file.jpg")
+>>> preds = segmenter(
+...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+... )
+>>> preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+>>> preds
+[{'score': 0.9856, 'label': 'LABEL_184'},
+ {'score': 0.9976, 'label': 'snow'},
+ {'score': 0.9962, 'label': 'cat'}]
 ```

 ### Depth estimation
@@ -120,7 +150,9 @@ There are two approaches to depth estimation:
 >>> from transformers import pipeline

 >>> depth_estimator = pipeline(task="depth-estimation")
->>> depth_estimator("path/to/image/file.jpg")
+>>> preds = depth_estimator(
+...     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+... )
 ```

 ## Natural language processing
@@ -138,7 +170,10 @@ Like classification tasks in any modality, text classification labels a sequence
 >>> from transformers import pipeline

 >>> classifier = pipeline(task="sentiment-analysis")
->>> classifier("Hugging Face is the best thing since sliced bread!")
+>>> preds = classifier("Hugging Face is the best thing since sliced bread!")
+>>> preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+>>> preds
+[{'score': 0.9991, 'label': 'POSITIVE'}]
 ```

 ### Token classification
@@ -154,7 +189,26 @@ Two common types of token classification are:
 >>> from transformers import pipeline

 >>> classifier = pipeline(task="ner")
->>> classifier("Hugging Face is a French company based in New York City.")
+>>> preds = classifier("Hugging Face is a French company based in New York City.")
+>>> preds = [
+...     {
+...         "entity": pred["entity"],
+...         "score": round(pred["score"], 4),
+...         "index": pred["index"],
+...         "word": pred["word"],
+...         "start": pred["start"],
+...         "end": pred["end"],
+...     }
+...     for pred in preds
+... ]
+>>> print(*preds, sep="\n")
+{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
+{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
+{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
+{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
+{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
+{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
+{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
 ```

 ### Question answering
@@ -171,10 +225,14 @@ There are two common types of question answering:
 >>> from transformers import pipeline

 >>> question_answerer = pipeline(task="question-answering")
->>> question_answerer(
+>>> preds = question_answerer(
 ...     question="What is the name of the repository?",
 ...     context="The name of the repository is huggingface/transformers",
 ... )
+>>> print(
+...     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
+... )
+score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
 ```

 ### Summarization
@@ -191,8 +249,9 @@ Like question answering, there are two types of summarization:

 >>> summarizer = pipeline(task="summarization")
 >>> summarizer(
-...     "Hugging Face is a French company based in New York City. Its headquarters are in DUMBO, therefore very close to the Manhattan Bridge which is visible from the window."
+...     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
 ... )
+[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
 ```

 ### Translation
@@ -205,8 +264,9 @@ In the early days, translation models were mostly monolingual, but recently, the
 >>> from transformers import pipeline

 >>> text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
->>> translator = pipeline(task="translation")
+>>> translator = pipeline(task="translation", model="t5-small")
 >>> translator(text)
+[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
 ```

 ### Language modeling
@@ -220,17 +280,31 @@ There are two types of language modeling:
    ```py
    >>> from transformers import pipeline

-    >>> prompt = "Hugging Face is a"
-    >>> text_generator = pipeline(task="text-generation")
-    >>> text_generator(prompt)
+    >>> prompt = "Hugging Face is a community-based open-source platform for machine learning."
+    >>> generator = pipeline(task="text-generation")
+    >>> generator(prompt)  # doctest: +SKIP
    ```

 * masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
    
    ```py
-    >>> text = "Hugging Face is a <mask> company based in New York City."
+    >>> text = "Hugging Face is a community-based open-source <mask> for machine learning."
    >>> fill_mask = pipeline(task="fill-mask")
-    >>> fill_mask(text, top_k=3)
+    >>> preds = fill_mask(text, top_k=1)
+    >>> preds = [
+    ...     {
+    ...         "score": round(pred["score"], 4),
+    ...         "token": pred["token"],
+    ...         "token_str": pred["token_str"],
+    ...         "sequence": pred["sequence"],
+    ...     }
+    ...     for pred in preds
+    ... ]
+    >>> preds
+    [{'score': 0.2236,
+      'token': 1761,
+      'token_str': ' platform',
+      'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
    ```

 Hopefully, this page has given you some more background information about all the types of tasks in each modality and the practical importance of each one. In the next [section](tasks_explained), you'll learn **how** 🤗 Transformers work to solve these tasks.