From 238449414f88d94ded35e80459bb6412d8ab42cf Mon Sep 17 00:00:00 2001
From: Maria Khalusova <kafooster@gmail.com>
Date: Wed, 25 Jan 2023 11:33:39 -0500
Subject: [PATCH] Documentation code sample fixes (#21302)

* Fixed the following:
pipe -> pipeline
out in pipe(data()) is a list of dict, not a dict

* Fixed the TypeError: __init__() missing 1 required positional argument: 'key'

* Added a tip: code sample requires additional libraries to run

* Fixed custom config's name

* added seqeval to the required libraries

* fixed a missing dependency,
fixed metric naming,
added checkpoint to fix the datacollator

* added checkpoint to fix the datacollator,
added missing dependency
---
 docs/source/en/create_a_model.mdx             |  4 ++--
 docs/source/en/pipeline_tutorial.mdx          | 18 +++++++++++++++---
 docs/source/en/tasks/summarization.mdx        | 13 +++++++------
 docs/source/en/tasks/token_classification.mdx |  2 +-
 docs/source/en/tasks/translation.mdx          | 15 ++++++++-------
 5 files changed, 33 insertions(+), 19 deletions(-)
diff --git a/docs/source/en/create_a_model.mdx b/docs/source/en/create_a_model.mdx
index b0bafa4589..5c736f1d79 100644
--- a/docs/source/en/create_a_model.mdx
+++ b/docs/source/en/create_a_model.mdx
@@ -95,7 +95,7 @@ Once you are satisfied with your model configuration, you can save it with [`~Pr
 To reuse the configuration file, load it with [`~PretrainedConfig.from_pretrained`]:
 
 ```py
->>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+>>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
 ```
 
 <Tip>
@@ -115,7 +115,7 @@ Load your custom configuration attributes into the model:
 ```py
 >>> from transformers import DistilBertModel
 
->>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+>>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
 >>> model = DistilBertModel(my_config)
 ```
 
diff --git a/docs/source/en/pipeline_tutorial.mdx b/docs/source/en/pipeline_tutorial.mdx
index 4be43484e0..8560d856f3 100644
--- a/docs/source/en/pipeline_tutorial.mdx
+++ b/docs/source/en/pipeline_tutorial.mdx
@@ -156,10 +156,10 @@ def data():
         yield f"My example {i}"
 
 
-pipe = pipe(model="gpt2", device=0)
+pipe = pipeline(model="gpt2", device=0)
 generated_characters = 0
 for out in pipe(data()):
-    generated_characters += len(out["generated_text"])
+    generated_characters += len(out[0]["generated_text"])
 ```
 
 The iterator `data()` yields each result, and the pipeline automatically
@@ -175,11 +175,12 @@ The simplest way to iterate over a dataset is to just load one from 🤗 [Datase
 ```py
 # KeyDataset is a util that will just output the item we're interested in.
 from transformers.pipelines.pt_utils import KeyDataset
+from datasets import load_dataset
 
 pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
 dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
 
-for out in pipe(KeyDataset(dataset["audio"])):
+for out in pipe(KeyDataset(dataset, "audio")):
     print(out)
 ```
 
@@ -246,3 +247,14 @@ For example, if you use this [invoice image](https://huggingface.co/spaces/impir
 ... )
 [{'score': 0.42514941096305847, 'answer': 'us-001', 'start': 16, 'end': 16}]
 ```
+
+<Tip>
+
+To run the example above you need to have [`pytesseract`](https://pypi.org/project/pytesseract/) installed in addition to 🤗 Transformers:
+
+```bash
+sudo apt install -y tesseract-ocr
+pip install pytesseract
+```
+
+</Tip>
\ No newline at end of file
diff --git a/docs/source/en/tasks/summarization.mdx b/docs/source/en/tasks/summarization.mdx
index 1663c1f713..879077d5cc 100644
--- a/docs/source/en/tasks/summarization.mdx
+++ b/docs/source/en/tasks/summarization.mdx
@@ -33,7 +33,7 @@ See the summarization [task page](https://huggingface.co/tasks/summarization) fo
 Before you begin, make sure you have all the necessary libraries installed:
 
 ```bash
-pip install transformers datasets evaluate
+pip install transformers datasets evaluate rouge_score
 ```
 
 We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
@@ -81,7 +81,8 @@ The next step is to load a T5 tokenizer to process `text` and `summary`:
 ```py
 >>> from transformers import AutoTokenizer
 
->>> tokenizer = AutoTokenizer.from_pretrained("t5-small")
+>>> checkpoint = "t5-small"
+>>> tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 ```
 
 The preprocessing function you want to create needs to:
@@ -117,14 +118,14 @@ Now create a batch of examples using [`DataCollatorForSeq2Seq`]. It's more effic
 ```py
 >>> from transformers import DataCollatorForSeq2Seq
 
->>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
+>>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
 ```
 </pt>
 <tf>
 ```py
 >>> from transformers import DataCollatorForSeq2Seq
 
->>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model, return_tensors="tf")
+>>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
 ```
 </tf>
 </frameworkcontent>
@@ -175,7 +176,7 @@ You're ready to start training your model now! Load T5 with [`AutoModelForSeq2Se
 ```py
 >>> from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
 
->>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
+>>> model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
 ```
 
 At this point, only three steps remain:
@@ -237,7 +238,7 @@ Then you can load T5 with [`TFAutoModelForSeq2SeqLM`]:
 ```py
 >>> from transformers import TFAutoModelForSeq2SeqLM
 
->>> model = TFAutoModelForSeq2SeqLM.from_pretrained("t5-small")
+>>> model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
 ```
 
 Convert your datasets to the `tf.data.Dataset` format with [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
diff --git a/docs/source/en/tasks/token_classification.mdx b/docs/source/en/tasks/token_classification.mdx
index 8c7ceac48f..64ad1d2543 100644
--- a/docs/source/en/tasks/token_classification.mdx
+++ b/docs/source/en/tasks/token_classification.mdx
@@ -32,7 +32,7 @@ See the token classification [task page](https://huggingface.co/tasks/token-clas
 Before you begin, make sure you have all the necessary libraries installed:
 
 ```bash
-pip install transformers datasets evaluate
+pip install transformers datasets evaluate seqeval
 ```
 
 We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
diff --git a/docs/source/en/tasks/translation.mdx b/docs/source/en/tasks/translation.mdx
index 318cb2b1a9..5f0a7fe385 100644
--- a/docs/source/en/tasks/translation.mdx
+++ b/docs/source/en/tasks/translation.mdx
@@ -30,7 +30,7 @@ See the translation [task page](https://huggingface.co/tasks/translation) for mo
 Before you begin, make sure you have all the necessary libraries installed:
 
 ```bash
-pip install transformers datasets evaluate
+pip install transformers datasets evaluate sacrebleu
 ```
 
 We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
@@ -77,7 +77,8 @@ The next step is to load a T5 tokenizer to process the English-French language p
 ```py
 >>> from transformers import AutoTokenizer
 
->>> tokenizer = AutoTokenizer.from_pretrained("t5-small")
+>>> checkpoint = "t5-small"
+>>> tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 ```
 
 The preprocessing function you want to create needs to:
@@ -112,7 +113,7 @@ Now create a batch of examples using [`DataCollatorForSeq2Seq`]. It's more effic
 ```py
 >>> from transformers import DataCollatorForSeq2Seq
 
->>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)
+>>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
 ```
 </pt>
 <tf>
@@ -120,7 +121,7 @@ Now create a batch of examples using [`DataCollatorForSeq2Seq`]. It's more effic
 ```py
 >>> from transformers import DataCollatorForSeq2Seq
 
->>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model, return_tensors="tf")
+>>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
 ```
 </tf>
 </frameworkcontent>
@@ -132,7 +133,7 @@ Including a metric during training is often helpful for evaluating your model's
 ```py
 >>> import evaluate
 
->>> sacrebleu = evaluate.load("sacrebleu")
+>>> metric = evaluate.load("sacrebleu")
 ```
 
 Then create a function that passes your predictions and labels to [`~evaluate.EvaluationModule.compute`] to calculate the SacreBLEU score:
@@ -184,7 +185,7 @@ You're ready to start training your model now! Load T5 with [`AutoModelForSeq2Se
 ```py
 >>> from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
 
->>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
+>>> model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
 ```
 
 At this point, only three steps remain:
@@ -246,7 +247,7 @@ Then you can load T5 with [`TFAutoModelForSeq2SeqLM`]:
 ```py
 >>> from transformers import TFAutoModelForSeq2SeqLM
 
->>> model = TFAutoModelForSeq2SeqLM.from_pretrained("t5-small")
+>>> model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
 ```
 
 Convert your datasets to the `tf.data.Dataset` format with [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]: