diff --git a/docs/source/task_summary.rst b/docs/source/task_summary.rst
index d92c849845..8ce9d239cb 100644
--- a/docs/source/task_summary.rst
+++ b/docs/source/task_summary.rst
@@ -513,7 +513,7 @@ Here, the model generates a random text with a total maximal length of *50* toke
 concerned, I will"*. The default arguments of ``PreTrainedModel.generate()`` can be directly overridden in the
 pipeline, as is shown above for the argument ``max_length``.
 
-Here is an example of text generation using ``XLNet`` and its tokenzier.
+Here is an example of text generation using ``XLNet`` and its tokenizer.
 
 .. code-block::
 
@@ -834,7 +834,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
 
 1. Instantiate a tokenizer and a model from the checkpoint name. Summarization is usually done using an encoder-decoder
    model, such as ``Bart`` or ``T5``.
-2. Define the article that should be summarizaed.
+2. Define the article that should be summarized.
 3. Add the T5 specific prefix "translate English to German: "
 4. Use the ``PreTrainedModel.generate()`` method to perform the translation.
 
diff --git a/model_cards/joeddav/bart-large-mnli-yahoo-answers/README.md b/model_cards/joeddav/bart-large-mnli-yahoo-answers/README.md
index 3ec1be3432..8e2316f469 100644
--- a/model_cards/joeddav/bart-large-mnli-yahoo-answers/README.md
+++ b/model_cards/joeddav/bart-large-mnli-yahoo-answers/README.md
@@ -16,7 +16,7 @@ This model takes [facebook/bart-large-mnli](https://huggingface.co/facebook/bart
 
 You can play with an interactive demo of this zero-shot technique with this model, as well as the non-finetuned [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli), [here](https://huggingface.co/zero-shot/).
 
-## Inteded Usage
+## Intended Usage
 
 This model was fine-tuned on topic classification and will perform best at zero-shot topic classification. Use `hypothesis_template="This text is about {}."` as this is the template used during fine-tuning.
 
diff --git a/model_cards/joeddav/xlm-roberta-large-xnli/README.md b/model_cards/joeddav/xlm-roberta-large-xnli/README.md
index a6652004a2..2141795692 100644
--- a/model_cards/joeddav/xlm-roberta-large-xnli/README.md
+++ b/model_cards/joeddav/xlm-roberta-large-xnli/README.md
@@ -24,7 +24,7 @@ widget:
 
 This model takes [xlm-roberta-large](https://huggingface.co/xlm-roberta-large) and fine-tunes it on a combination of NLI data in 15 languages. It is intended to be used for zero-shot text classification, such as with the Hugging Face [ZeroShotClassificationPipeline](https://huggingface.co/transformers/master/main_classes/pipelines.html#transformers.ZeroShotClassificationPipeline).
 
-## Inteded Usage
+## Intended Usage
 
 This model is intended to be used for zero-shot text classification, especially in languages other than English. It is fine-tuned on XNLI, which is a multilingual NLI dataset. The model can therefore be used with any of the languages in the XNLI corpus:
 
diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
index 00aeafb8af..5ab039967d 100644
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -396,7 +396,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
             new_num_tokens (:obj:`int`, `optional`):
                 The number of new tokens in the embedding matrix. Increasing the size will add newly initialized
                 vectors at the end. Reducing the size will remove vectors from the end. If not provided or :obj:`None`,
-                just returns a pointer to the input tokens :obj:`tf.Variable` module of the model wihtout doing
+                just returns a pointer to the input tokens :obj:`tf.Variable` module of the model without doing
                 anything.
 
         Return:
@@ -442,7 +442,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
 
                 Increasing the size will add newly initialized vectors at the end. Reducing the size will remove
                 vectors from the end. If not provided or :obj:`None`, just returns a pointer to the input tokens
-                :obj:`tf.Variable`` module of the model wihtout doing anything.
+                :obj:`tf.Variable`` module of the model without doing anything.
 
         Return:
             :obj:`tf.Variable`: Pointer to the resized Embedding Module or the old Embedding Module if
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index 6cdafc7fc5..5a22b2215f 100755
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -539,7 +539,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
                         ) != len(decoder_modules):
                             # this can happen if the name corresponds to the position in a list module list of layers
                             # in this case the decoder has added a cross-attention that the encoder does not have
-                            # thus skip this step and substract one layer pos from encoder
+                            # thus skip this step and subtract one layer pos from encoder
                             encoder_layer_pos -= 1
                             continue
                     elif name not in encoder_modules:
@@ -598,7 +598,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
             new_num_tokens (:obj:`int`, `optional`):
                 The number of new tokens in the embedding matrix. Increasing the size will add newly initialized
                 vectors at the end. Reducing the size will remove vectors from the end. If not provided or :obj:`None`,
-                just returns a pointer to the input tokens :obj:`torch.nn.Embedding` module of the model wihtout doing
+                just returns a pointer to the input tokens :obj:`torch.nn.Embedding` module of the model without doing
                 anything.
 
         Return:
@@ -639,7 +639,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
 
                 Increasing the size will add newly initialized vectors at the end. Reducing the size will remove
                 vectors from the end. If not provided or :obj:`None`, just returns a pointer to the input tokens
-                :obj:`torch.nn.Embedding`` module of the model wihtout doing anything.
+                :obj:`torch.nn.Embedding`` module of the model without doing anything.
 
         Return:
             :obj:`torch.nn.Embedding`: Pointer to the resized Embedding Module or the old Embedding Module if
@@ -1366,7 +1366,7 @@ class SQuADHead(nn.Module):
                 Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
                 should be masked.
             return_dict (:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Whether or not to return a :class:`~transformers.file_utils.ModelOuput` instead of a plain tuple.
+                Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
 
         Returns:
         """
@@ -1652,7 +1652,7 @@ def apply_chunking_to_forward(
             The input tensors of ``forward_fn`` which will be chunked
 
     Returns:
-        :obj:`torch.Tensor`: A tensor with the same shape as the :obj:`foward_fn` would have given if applied`.
+        :obj:`torch.Tensor`: A tensor with the same shape as the :obj:`forward_fn` would have given if applied`.
 
 
     Examples::
@@ -1673,7 +1673,7 @@ def apply_chunking_to_forward(
         input_tensor.shape == tensor_shape for input_tensor in input_tensors
     ), "All input tenors have to be of the same shape"
 
-    # inspect.signature exist since python 3.5 and is a python method -> no problem with backward compability
+    # inspect.signature exist since python 3.5 and is a python method -> no problem with backward compatibility
     num_args_in_forward_chunk_fn = len(inspect.signature(forward_fn).parameters)
     assert num_args_in_forward_chunk_fn == len(
         input_tensors
diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index f48edb060c..bf7377f4e6 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1057,12 +1057,12 @@ class ZeroShotClassificationPipeline(Pipeline):
         return -1
 
     def _parse_and_tokenize(
-        self, sequences, candidal_labels, hypothesis_template, padding=True, add_special_tokens=True, **kwargs
+        self, sequences, candidate_labels, hypothesis_template, padding=True, add_special_tokens=True, **kwargs
     ):
         """
         Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
         """
-        sequence_pairs = self._args_parser(sequences, candidal_labels, hypothesis_template)
+        sequence_pairs = self._args_parser(sequences, candidate_labels, hypothesis_template)
         inputs = self.tokenizer(
             sequence_pairs,
             add_special_tokens=add_special_tokens,
@@ -2758,7 +2758,9 @@ def pipeline(
             - :obj:`"fill-mask"`: will return a :class:`~transformers.FillMaskPipeline`.
             - :obj:`"summarization"`: will return a :class:`~transformers.SummarizationPipeline`.
             - :obj:`"translation_xx_to_yy"`: will return a :class:`~transformers.TranslationPipeline`.
+            - :obj:`"text2text-generation"`: will return a :class:`~transformers.Text2TextGenerationPipeline`.
             - :obj:`"text-generation"`: will return a :class:`~transformers.TextGenerationPipeline`.
+            - :obj:`"zero-shot-classification:`: will return a :class:`~transformers.ZeroShotClassificationPipeline`.
             - :obj:`"conversation"`: will return a :class:`~transformers.ConversationalPipeline`.
         model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`):
             The model that will be used by the pipeline to make predictions. This can be a model identifier or an