From be236361f1ab5e27bda5c224160321fbd8b67f24 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 29 Oct 2021 11:34:18 +0200
Subject: [PATCH] Adding `batch_size` support for (almost) all pipelines
 (#13724)

* Tentative enabling of `batch_size` for pipelines.

* Add systematic test for pipeline batching.

* Enabling batch_size on almost all pipelines

- Not `zero-shot` (it's already passing stuff as batched so trickier)
- Not `QA` (preprocess uses squad features, we need to switch to real
tensors at this boundary.

* Adding `min_length_for_response` for conversational.

* Making CTC, speech mappings avaiable regardless of framework.

* Attempt at fixing automatic tests (ffmpeg not enabled for fast tests)

* Removing ffmpeg dependency in tests.

* Small fixes.

* Slight cleanup.

* Adding docs

and adressing comments.

* Quality.

* Update docs/source/main_classes/pipelines.rst

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/pipelines/question_answering.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/pipelines/zero_shot_classification.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Improving docs.

* Update docs/source/main_classes/pipelines.rst

Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com>

* N -> oberved_batch_size

softmax trick.

* Follow `padding_side`.

* Supporting image pipeline batching (and padding).

* Rename `unbatch` -> `loader_batch`.

* unbatch_size forgot.

* Custom padding for offset mappings.

* Attempt to remove librosa.

* Adding require_audio.

* torchaudio.

* Back to using datasets librosa.

* Adding help to set a pad_token on the tokenizer.

* Update src/transformers/pipelines/base.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/pipelines/base.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/pipelines/base.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Quality.

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com>
---
 docs/source/main_classes/pipelines.rst        | 143 +++++++++++++++
 src/transformers/__init__.py                  |   4 +
 .../pipelines/automatic_speech_recognition.py |   4 +
 src/transformers/pipelines/base.py            | 172 +++++++++++++++++-
 src/transformers/pipelines/conversational.py  |   8 +-
 src/transformers/pipelines/fill_mask.py       |   4 +-
 .../pipelines/object_detection.py             |   5 +-
 .../pipelines/question_answering.py           |   6 +
 .../pipelines/token_classification.py         |  13 +-
 .../pipelines/zero_shot_classification.py     |   3 +
 src/transformers/utils/dummy_pt_objects.py    |   6 +
 tests/test_pipelines_audio_classification.py  |  23 ++-
 ..._pipelines_automatic_speech_recognition.py |  57 +++++-
 tests/test_pipelines_common.py                | 118 +++++++++++-
 tests/test_pipelines_conversational.py        |   5 +-
 tests/test_pipelines_feature_extraction.py    |  17 +-
 tests/test_pipelines_fill_mask.py             |  18 +-
 tests/test_pipelines_image_classification.py  |  12 +-
 tests/test_pipelines_object_detection.py      |   7 +-
 tests/test_pipelines_question_answering.py    |  11 +-
 tests/test_pipelines_summarization.py         |   6 +-
 tests/test_pipelines_text2text_generation.py  |   4 +-
 tests/test_pipelines_text_classification.py   |   5 +-
 tests/test_pipelines_text_generation.py       |   8 +-
 tests/test_pipelines_token_classification.py  |   7 +-
 tests/test_pipelines_translation.py           |  19 +-
 tests/test_pipelines_zero_shot.py             |   8 +-
 27 files changed, 629 insertions(+), 64 deletions(-)

diff --git a/docs/source/main_classes/pipelines.rst b/docs/source/main_classes/pipelines.rst
index 1bc60d340f..146c504861 100644
--- a/docs/source/main_classes/pipelines.rst
+++ b/docs/source/main_classes/pipelines.rst
@@ -71,6 +71,11 @@ GPU. If it doesn't don't hesitate to create an issue.
 
 .. code-block::
 
+    import datasets
+    from transformers import pipeline
+    from transformers.pipelines.base import KeyDataset
+    import tqdm
+
     pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
     dataset = datasets.load_dataset("superb", name="asr", split="test")
 
@@ -85,6 +90,144 @@ GPU. If it doesn't don't hesitate to create an issue.
 
 .. autofunction:: transformers.pipeline
 
+Pipeline batching
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All pipelines (except `zero-shot-classification` and `question-answering` currently) can use batching. This will work
+whenever the pipeline uses its streaming ability (so when passing lists or :obj:`Dataset`).
+
+.. code-block::
+
+    from transformers import pipeline                                                   
+    from transformers.pipelines.base import KeyDataset
+    import datasets
+    import tqdm                                                                         
+
+    dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
+    pipe = pipeline("text-classification", device=0)
+    for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
+        print(out)
+        # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+        # Exactly the same output as before, but the content are passed
+        # as batches to the model
+
+
+.. warning::
+
+    However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+    on hardware, data and the actual model being used.
+
+    Example where it's most a speedup:
+
+
+.. code-block::
+
+    from transformers import pipeline                                                   
+    from torch.utils.data import Dataset                                                
+    import tqdm                                                                         
+
+
+    pipe = pipeline("text-classification", device=0)                                    
+
+
+    class MyDataset(Dataset):                                                           
+        def __len__(self):                                                              
+            return 5000                                                                 
+
+        def __getitem__(self, i):                                                       
+            return "This is a test"                                                     
+
+
+    dataset = MyDataset()   
+
+    for batch_size in [1, 8, 64, 256]:
+        print("-" * 30)                                                                     
+        print(f"Streaming batch_size={batch_size}")    
+        for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):              
+            pass
+
+
+.. code-block::
+
+    # On GTX 970
+    ------------------------------
+    Streaming no batching
+    100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
+    ------------------------------
+    Streaming batch_size=8
+    100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
+    ------------------------------
+    Streaming batch_size=64
+    100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
+    ------------------------------
+    Streaming batch_size=256
+    100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
+    (diminishing returns, saturated the GPU)
+
+
+Example where it's most a slowdown:
+
+.. code-block::
+
+    class MyDataset(Dataset):                                                           
+        def __len__(self):                                                              
+            return 5000                                                                 
+
+        def __getitem__(self, i):                                                       
+            if i % 64 == 0:                                                          
+                n = 100                                                              
+            else:                                                                    
+                n = 1                                                                
+            return "This is a test" * n
+
+This is a occasional very long sentence compared to the other. In that case, the **whole** batch will need to be 400
+tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
+bigger batches, the program simply crashes.
+
+
+.. code-block::
+
+    ------------------------------
+    Streaming no batching
+    100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
+    ------------------------------
+    Streaming batch_size=8
+    100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
+    ------------------------------
+    Streaming batch_size=64
+    100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
+    ------------------------------
+    Streaming batch_size=256
+      0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
+    Traceback (most recent call last):
+      File "/home/nicolas/src/transformers/test.py", line 42, in <module>
+        for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+    ....
+        q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
+    RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
+
+
+There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
+thumb:
+
+For users, a rule of thumb is:
+
+- **Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.**
+- If you are latency constrained (live product doing inference), don't batch
+- If you are using CPU, don't batch.
+- If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
+
+      - If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
+        try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
+        control the sequence_length.)
+      - If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
+        it until you get OOMs.
+      - The larger the GPU the more likely batching is going to be more interesting
+- As soon as you enable batching, make sure you can handle OOMs nicely.
+
+
+
 Implementing a pipeline
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index ef83e85a9f..ffc4e4bda7 100755
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -584,6 +584,7 @@ if is_torch_available():
         [
             "MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
             "MODEL_FOR_CAUSAL_LM_MAPPING",
+            "MODEL_FOR_CTC_MAPPING",
             "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
             "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
             "MODEL_FOR_MASKED_LM_MAPPING",
@@ -594,6 +595,7 @@ if is_torch_available():
             "MODEL_FOR_QUESTION_ANSWERING_MAPPING",
             "MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
             "MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
+            "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING",
             "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
             "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
             "MODEL_MAPPING",
@@ -2430,6 +2432,7 @@ if TYPE_CHECKING:
         from .models.auto import (
             MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
             MODEL_FOR_CAUSAL_LM_MAPPING,
+            MODEL_FOR_CTC_MAPPING,
             MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
             MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
             MODEL_FOR_MASKED_LM_MAPPING,
@@ -2440,6 +2443,7 @@ if TYPE_CHECKING:
             MODEL_FOR_QUESTION_ANSWERING_MAPPING,
             MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
             MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+            MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
             MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
             MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
             MODEL_MAPPING,
diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py
index c5c249916b..f1dd39c46d 100644
--- a/src/transformers/pipelines/automatic_speech_recognition.py
+++ b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -169,6 +169,10 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
         elif model_class in MODEL_FOR_CTC_MAPPING.values():
             outputs = self.model(**model_inputs)
             tokens = outputs.logits.squeeze(0).argmax(dim=-1)
+        else:
+            logger.warning("This is an unknown class, treating it as CTC.")
+            outputs = self.model(**model_inputs)
+            tokens = outputs.logits.squeeze(0).argmax(dim=-1)
         return tokens
 
     def postprocess(self, model_outputs):
diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py
index 6497e71f56..cf4414103b 100644
--- a/src/transformers/pipelines/base.py
+++ b/src/transformers/pipelines/base.py
@@ -25,6 +25,7 @@ from contextlib import contextmanager
 from os.path import abspath, exists
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 
+import numpy as np
 from packaging import version
 
 from ..feature_extraction_utils import PreTrainedFeatureExtractor
@@ -59,12 +60,80 @@ if TYPE_CHECKING:
 logger = logging.get_logger(__name__)
 
 
-def collate_fn(items):
+def no_collate_fn(items):
     if len(items) != 1:
         raise ValueError("This collate_fn is meant to be used with batch_size=1")
     return items[0]
 
 
+def _pad(items, key, padding_value, padding_side):
+    batch_size = len(items)
+    if isinstance(items[0][key], torch.Tensor):
+        # Others include `attention_mask` etc...
+        shape = items[0][key].shape
+        dim = len(shape)
+        if dim == 4:
+            # This is probable image so padding shouldn't be necessary
+            # B, C, H, W
+            return torch.cat([item[key] for item in items], dim=0)
+        max_length = max(item[key].shape[1] for item in items)
+        dtype = items[0][key].dtype
+
+        if dim == 2:
+            tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value
+        elif dim == 3:
+            tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value
+
+        for i, item in enumerate(items):
+            if dim == 2:
+                if padding_side == "left":
+                    tensor[i, -len(item[key][0]) :] = item[key][0].clone()
+                else:
+                    tensor[i, : len(item[key][0])] = item[key][0].clone()
+            elif dim == 3:
+                if padding_side == "left":
+                    tensor[i, -len(item[key][0]) :, :] = item[key][0].clone()
+                else:
+                    tensor[i, : len(item[key][0]), :] = item[key][0].clone()
+        return tensor
+    else:
+        return [item[key] for item in items]
+
+
+def pad_collate_fn(tokenizer, feature_extractor):
+    padding_side = "right"
+    if tokenizer is None and feature_extractor is None:
+        raise ValueError("Pipeline without tokenizer or feature_extractor cannot do batching")
+    if tokenizer is not None:
+        if tokenizer.pad_token_id is None:
+            raise ValueError(
+                "Pipeline with tokenizer without pad_token cannot do batching. You can try to set it with "
+                "`pipe.tokenizer.pad_token_id = model.config.eos_token_id`."
+            )
+        else:
+            padding_value = tokenizer.pad_token_id
+            padding_side = tokenizer.padding_side
+    if feature_extractor is not None:
+        # Feature extractor can be images, where no padding is expected
+        padding_value = getattr(feature_extractor, "padding_value", None)
+        padding_side = getattr(feature_extractor, "padding_side", None)
+
+    def inner(items):
+        keys = set(items[0].keys())
+        for item in items:
+            if set(item.keys()) != keys:
+                raise ValueError(
+                    f"The elements of the batch contain different keys. Cannot batch them ({set(item.keys())} != {keys})"
+                )
+        # input_values, input_pixels, input_ids, ...
+        padded = {
+            key: _pad(items, key, padding_value if key.startswith("input_") else 0, padding_side) for key in keys
+        }
+        return padded
+
+    return inner
+
+
 def infer_framework_load_model(
     model,
     config: AutoConfig,
@@ -591,6 +660,13 @@ PIPELINE_INIT_ARGS = r"""
             is provided.
         task (:obj:`str`, defaults to :obj:`""`):
             A task-identifier for the pipeline.
+        num_workers (:obj:`int`, `optional`, defaults to 8):
+            When the pipeline will use `DataLoader` (when passing a dataset, on GPU for a Pytorch model), the number of
+            workers to be used.
+        batch_size (:obj:`int`, `optional`, defaults to 1):
+            When the pipeline will use `DataLoader` (when passing a dataset, on GPU for a Pytorch model), the size of
+            the batch to use, for inference this is not always beneficial, please read `Batching with pipelines
+            <https://huggingface.co/transformers/main_classes/pipelines.html#pipeline-batching>`_ .
         args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`):
             Reference to the object in charge of parsing supplied pipeline parameters.
         device (:obj:`int`, `optional`, defaults to -1):
@@ -617,10 +693,44 @@ if is_torch_available():
             return processed
 
     class PipelineIterator(IterableDataset):
-        def __init__(self, loader, infer, params):
+        def __init__(self, loader, infer, params, loader_batch_size=None):
+            """
+            Roughly equivalent to
+
+            .. code-block::
+                for item in loader:
+                    yield infer(item, **params)
+
+            Arguments:
+                loader (:obj:`torch.utils.data.DataLoader` or any iterator):
+                    The iterator that will be used to apply :obj:`infer` on.
+                infer (any function):
+                    The function to apply of each element of :obj:`loader`.
+                params (:obj:`dict`):
+                    The parameters passed to :obj:`infer` along with every item
+                loader_batch_size (:obj:`int`, `optional`):
+                    If specified, the items of :obj:`loader` are supposed to come as batch, and are loader_batched here
+                    making it roughly behave as
+
+
+                    .. code-block::
+
+                        for items in loader:
+                            for i in loader_batch_size:
+                                item = items[i]
+                                yield infer(item, **params)
+            """
             self.loader = loader
             self.infer = infer
             self.params = params
+            if loader_batch_size == 1:
+                # Let's spare some time by deactivating altogether
+                loader_batch_size = None
+            self.loader_batch_size = loader_batch_size
+
+            # Internal bookkeeping
+            self._loader_batch_index = None
+            self._loader_batch_data = None
 
         def __len__(self):
             return len(self.loader)
@@ -629,10 +739,49 @@ if is_torch_available():
             self.iterator = iter(self.loader)
             return self
 
+        def loader_batch_item(self):
+            if isinstance(self._loader_batch_data, torch.Tensor):
+                result = self._loader_batch_data[self._loader_batch_index]
+            else:
+                loader_batched = {}
+                for k, element in self._loader_batch_data.items():
+                    if k == "past_key_values":
+                        continue
+                    if isinstance(element[self._loader_batch_index], torch.Tensor):
+                        loader_batched[k] = element[self._loader_batch_index].unsqueeze(0)
+                    elif isinstance(element[self._loader_batch_index], np.ndarray):
+                        loader_batched[k] = np.expand_dims(element[self._loader_batch_index], 0)
+                    else:
+                        loader_batched[k] = element[self._loader_batch_index]
+                result = self._loader_batch_data.__class__(loader_batched)
+            self._loader_batch_index += 1
+            return result
+
         def __next__(self):
+            if self._loader_batch_index is not None and self._loader_batch_index < self.loader_batch_size:
+                return self.loader_batch_item()
+
             item = next(self.iterator)
             processed = self.infer(item, **self.params)
-            return processed
+            if self.loader_batch_size is not None:
+                if isinstance(processed, torch.Tensor):
+                    first_tensor = processed
+                else:
+                    key = list(processed.keys())[0]
+                    first_tensor = processed[key]
+                if isinstance(first_tensor, list):
+                    observed_batch_size = len(first_tensor)
+                else:
+                    observed_batch_size = first_tensor.shape[0]
+                if 0 < observed_batch_size < self.loader_batch_size:
+                    # Could be last batch so we can't unroll as many
+                    # elements.
+                    self.loader_batch_size = observed_batch_size
+                self._loader_batch_data = processed
+                self._loader_batch_index = 0
+                return self.loader_batch_item()
+            else:
+                return processed
 
     class KeyDataset(Dataset):
         def __init__(self, dataset: Dataset, key: str):
@@ -881,17 +1030,20 @@ class Pipeline(_ScikitCompat):
                 raise ValueError(f"Framework {self.framework} is not supported")
         return model_outputs
 
-    def get_iterator(self, inputs, num_workers: int, preprocess_params, forward_params, postprocess_params):
+    def get_iterator(
+        self, inputs, num_workers: int, batch_size: int, preprocess_params, forward_params, postprocess_params
+    ):
         if "TOKENIZERS_PARALLELISM" not in os.environ:
             logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already")
             os.environ["TOKENIZERS_PARALLELISM"] = "false"
         dataset = PipelineDataset(inputs, self.preprocess, preprocess_params)
-        dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=1, collate_fn=collate_fn)
-        model_iterator = PipelineIterator(dataloader, self.forward, forward_params)
+        collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
+        dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
+        model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
         final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
         return final_iterator
 
-    def __call__(self, inputs, *args, num_workers=8, **kwargs):
+    def __call__(self, inputs, *args, num_workers=0, batch_size=1, **kwargs):
         if args:
             logger.warning(f"Ignoring args : {args}")
         preprocess_params, forward_params, postprocess_params = self._sanitize_parameters(**kwargs)
@@ -910,14 +1062,16 @@ class Pipeline(_ScikitCompat):
         if isinstance(inputs, list):
             if self.framework == "pt":
                 final_iterator = self.get_iterator(
-                    inputs, num_workers, preprocess_params, forward_params, postprocess_params
+                    inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
                 )
                 outputs = [output for output in final_iterator]
                 return outputs
             else:
                 return self.run_multi(inputs, preprocess_params, forward_params, postprocess_params)
         elif Dataset is not None and isinstance(inputs, Dataset):
-            return self.get_iterator(inputs, num_workers, preprocess_params, forward_params, postprocess_params)
+            return self.get_iterator(
+                inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
+            )
         else:
             return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
 
diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index 4cb0e888a4..2fd90061a5 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -243,7 +243,7 @@ class ConversationalPipeline(Pipeline):
             return outputs[0]
         return outputs
 
-    def preprocess(self, conversation: Conversation) -> Dict[str, Any]:
+    def preprocess(self, conversation: Conversation, min_length_for_response=32) -> Dict[str, Any]:
         if not isinstance(conversation, Conversation):
             raise ValueError("ConversationalPipeline, expects Conversation as inputs")
         if conversation.new_user_input is None:
@@ -274,18 +274,18 @@ class ConversationalPipeline(Pipeline):
             if "attention_mask" in model_inputs:
                 model_inputs["attention_mask"] = model_inputs["attention_mask"][:, -trim:]
         conversation = model_inputs.pop("conversation")
-        model_inputs["max_length"] = max_length
+        generate_kwargs["max_length"] = max_length
         output_ids = self.model.generate(**model_inputs, **generate_kwargs)
         if self.model.config.is_encoder_decoder:
             start_position = 1
         else:
             start_position = n
-        return {"output_ids": output_ids[0, start_position:], "conversation": conversation}
+        return {"output_ids": output_ids[:, start_position:], "conversation": conversation}
 
     def postprocess(self, model_outputs, clean_up_tokenization_spaces=True):
         output_ids = model_outputs["output_ids"]
         answer = self.tokenizer.decode(
-            output_ids,
+            output_ids[0],
             skip_special_tokens=True,
             clean_up_tokenization_spaces=clean_up_tokenization_spaces,
         )
diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py
index 5392db979b..c1a03f6220 100644
--- a/src/transformers/pipelines/fill_mask.py
+++ b/src/transformers/pipelines/fill_mask.py
@@ -89,14 +89,14 @@ class FillMaskPipeline(Pipeline):
 
     def _forward(self, model_inputs):
         model_outputs = self.model(**model_inputs)
-        model_outputs["input_ids"] = model_inputs["input_ids"][0]
+        model_outputs["input_ids"] = model_inputs["input_ids"]
         return model_outputs
 
     def postprocess(self, model_outputs, top_k=5, target_ids=None):
         # Cap top_k if there are targets
         if target_ids is not None and target_ids.shape[0] < top_k:
             top_k = target_ids.shape[0]
-        input_ids = model_outputs["input_ids"]
+        input_ids = model_outputs["input_ids"][0]
         outputs = model_outputs["logits"]
         result = []
 
diff --git a/src/transformers/pipelines/object_detection.py b/src/transformers/pipelines/object_detection.py
index a3496b304f..6ecdc41f38 100644
--- a/src/transformers/pipelines/object_detection.py
+++ b/src/transformers/pipelines/object_detection.py
@@ -114,11 +114,12 @@ class ObjectDetectionPipeline(Pipeline):
     def _forward(self, model_inputs):
         target_size = model_inputs.pop("target_size")
         outputs = self.model(**model_inputs)
-        model_outputs = {"outputs": outputs, "target_size": target_size}
+        model_outputs = outputs.__class__({"target_size": target_size, **outputs})
         return model_outputs
 
     def postprocess(self, model_outputs, threshold=0.9):
-        raw_annotations = self.feature_extractor.post_process(model_outputs["outputs"], model_outputs["target_size"])
+        target_size = model_outputs["target_size"]
+        raw_annotations = self.feature_extractor.post_process(model_outputs, target_size)
         raw_annotation = raw_annotations[0]
         keep = raw_annotation["scores"] > threshold
         scores = raw_annotation["scores"][keep]
diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py
index 585fd78dd0..ec864e0a20 100644
--- a/src/transformers/pipelines/question_answering.py
+++ b/src/transformers/pipelines/question_answering.py
@@ -8,9 +8,12 @@ from ..data import SquadExample, SquadFeatures, squad_convert_examples_to_featur
 from ..file_utils import PaddingStrategy, add_end_docstrings, is_tf_available, is_torch_available
 from ..modelcard import ModelCard
 from ..tokenization_utils import PreTrainedTokenizer
+from ..utils import logging
 from .base import PIPELINE_INIT_ARGS, ArgumentHandler, Pipeline
 
 
+logger = logging.get_logger(__name__)
+
 if TYPE_CHECKING:
     from ..modeling_tf_utils import TFPreTrainedModel
     from ..modeling_utils import PreTrainedModel
@@ -241,6 +244,9 @@ class QuestionAnsweringPipeline(Pipeline):
             - **end** (:obj:`int`) -- The character end index of the answer (in the tokenized version of the input).
             - **answer** (:obj:`str`) -- The answer to the question.
         """
+        if kwargs.get("batch_size", 1) > 1:
+            logger.error("Batch_size > 1 is not supported for question answering pipeline, setting it to 1.")
+            kwargs["batch_size"] = 1
 
         # Convert inputs to features
         examples = self._args_parser(*args, **kwargs)
diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py
index fc3fce2366..49ede61dc8 100644
--- a/src/transformers/pipelines/token_classification.py
+++ b/src/transformers/pipelines/token_classification.py
@@ -204,12 +204,12 @@ class TokenClassificationPipeline(Pipeline):
         offset_mapping = model_inputs.pop("offset_mapping", None)
         sentence = model_inputs.pop("sentence")
         if self.framework == "tf":
-            outputs = self.model(model_inputs.data)[0][0]
+            logits = self.model(model_inputs.data)[0]
         else:
-            outputs = self.model(**model_inputs)[0][0]
+            logits = self.model(**model_inputs)[0]
 
         return {
-            "outputs": outputs,
+            "logits": logits,
             "special_tokens_mask": special_tokens_mask,
             "offset_mapping": offset_mapping,
             "sentence": sentence,
@@ -217,13 +217,16 @@ class TokenClassificationPipeline(Pipeline):
         }
 
     def postprocess(self, model_outputs, aggregation_strategy=AggregationStrategy.NONE):
-        outputs = model_outputs["outputs"].numpy()
+        logits = model_outputs["logits"][0].numpy()
         sentence = model_outputs["sentence"]
         input_ids = model_outputs["input_ids"][0]
         offset_mapping = model_outputs["offset_mapping"][0] if model_outputs["offset_mapping"] is not None else None
         special_tokens_mask = model_outputs["special_tokens_mask"][0].numpy()
 
-        scores = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True)
+        maxes = np.max(logits, axis=-1, keepdims=True)
+        shifted_exp = np.exp(logits - maxes)
+        scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+
         pre_entities = self.gather_pre_entities(
             sentence, input_ids, scores, offset_mapping, special_tokens_mask, aggregation_strategy
         )
diff --git a/src/transformers/pipelines/zero_shot_classification.py b/src/transformers/pipelines/zero_shot_classification.py
index f308c48b16..7d3aa1b43c 100644
--- a/src/transformers/pipelines/zero_shot_classification.py
+++ b/src/transformers/pipelines/zero_shot_classification.py
@@ -183,6 +183,9 @@ class ZeroShotClassificationPipeline(Pipeline):
             - **labels** (:obj:`List[str]`) -- The labels sorted by order of likelihood.
             - **scores** (:obj:`List[float]`) -- The probabilities for each of the labels.
         """
+        if kwargs.get("batch_size", 1) > 1:
+            logger.error("Batch size > 1 is not supported for zero-shot pipeline, setting batch_size=1.")
+            kwargs["batch_size"] = 1
 
         if len(args) == 0:
             pass
diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
index 3ac8fcbd0e..3a4354b953 100644
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -313,6 +313,9 @@ MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING = None
 MODEL_FOR_CAUSAL_LM_MAPPING = None
 
 
+MODEL_FOR_CTC_MAPPING = None
+
+
 MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
 
 
@@ -343,6 +346,9 @@ MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = None
 MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = None
 
 
+MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING = None
+
+
 MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = None
 
 
diff --git a/tests/test_pipelines_audio_classification.py b/tests/test_pipelines_audio_classification.py
index 561d333caa..07d6a3629d 100644
--- a/tests/test_pipelines_audio_classification.py
+++ b/tests/test_pipelines_audio_classification.py
@@ -24,6 +24,7 @@ from transformers.testing_utils import (
     require_datasets,
     require_tf,
     require_torch,
+    require_torchaudio,
     slow,
 )
 
@@ -35,15 +36,16 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
 class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
     model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
 
-    @require_datasets
-    @slow
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
-        import datasets
-
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor)
 
         # test with a raw waveform
         audio = np.zeros((34000,))
+        audio2 = np.zeros((14000,))
+        return audio_classifier, [audio2, audio]
+
+    def run_pipeline_test(self, audio_classifier, examples):
+        audio2, audio = examples
         output = audio_classifier(audio)
         # by default a model is initialized with num_labels=2
         self.assertEqual(
@@ -61,10 +63,17 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
             ],
         )
 
+        self.run_torchaudio(audio_classifier)
+
+    @require_datasets
+    @require_torchaudio
+    def run_torchaudio(self, audio_classifier):
+        import datasets
+
         # test with a local file
         dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
-        filename = dataset[0]["file"]
-        output = audio_classifier(filename)
+        audio = dataset[0]["audio"]["array"]
+        output = audio_classifier(audio)
         self.assertEqual(
             output,
             [
diff --git a/tests/test_pipelines_automatic_speech_recognition.py b/tests/test_pipelines_automatic_speech_recognition.py
index e2f7644859..a50512887d 100644
--- a/tests/test_pipelines_automatic_speech_recognition.py
+++ b/tests/test_pipelines_automatic_speech_recognition.py
@@ -14,11 +14,28 @@
 
 import unittest
 
+import numpy as np
 import pytest
 
-from transformers import AutoFeatureExtractor, AutoTokenizer, Speech2TextForConditionalGeneration, Wav2Vec2ForCTC
+from transformers import (
+    MODEL_FOR_CTC_MAPPING,
+    MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
+    AutoFeatureExtractor,
+    AutoTokenizer,
+    Speech2TextForConditionalGeneration,
+    Wav2Vec2ForCTC,
+)
 from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline
-from transformers.testing_utils import is_pipeline_test, require_datasets, require_torch, require_torchaudio, slow
+from transformers.testing_utils import (
+    is_pipeline_test,
+    require_datasets,
+    require_tf,
+    require_torch,
+    require_torchaudio,
+    slow,
+)
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
 
 
 # We can't use this mixin because it assumes TF support.
@@ -26,14 +43,42 @@ from transformers.testing_utils import is_pipeline_test, require_datasets, requi
 
 
 @is_pipeline_test
-class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
+class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = {
+        k: v
+        for k, v in (list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING.items()) if MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING else [])
+        + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
+    }
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if tokenizer is None:
+            # Side effect of no Fast Tokenizer class for these model, so skipping
+            # But the slow tokenizer test should still run as they're quite small
+            self.skipTest("No tokenizer available")
+            return
+            # return None, None
+
+        speech_recognizer = AutomaticSpeechRecognitionPipeline(
+            model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
+        )
+
+        # test with a raw waveform
+        audio = np.zeros((34000,))
+        audio2 = np.zeros((14000,))
+        return speech_recognizer, [audio, audio2]
+
+    def run_pipeline_test(self, speech_recognizer, examples):
+        audio = np.zeros((34000,))
+        outputs = speech_recognizer(audio)
+        self.assertEqual(outputs, {"text": ANY(str)})
+
     @require_torch
     @slow
     def test_pt_defaults(self):
         pipeline("automatic-speech-recognition", framework="pt")
 
     @require_torch
-    def test_torch_small(self):
+    def test_small_model_pt(self):
         import numpy as np
 
         speech_recognizer = pipeline(
@@ -46,6 +91,10 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
         output = speech_recognizer(waveform)
         self.assertEqual(output, {"text": "(Applaudissements)"})
 
+    @require_tf
+    def test_small_model_tf(self):
+        self.skipTest("Tensorflow not supported yet.")
+
     @require_torch
     def test_torch_small_no_tokenizer_files(self):
         # test that model without tokenizer file cannot be loaded
diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index d727fdbb7c..e64d4b8c09 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import importlib
 import logging
+import random
 import string
 import unittest
 from abc import abstractmethod
@@ -21,6 +23,7 @@ from functools import lru_cache
 from unittest import skipIf
 
 from transformers import FEATURE_EXTRACTOR_MAPPING, TOKENIZER_MAPPING, AutoFeatureExtractor, AutoTokenizer, pipeline
+from transformers.pipelines.base import _pad
 from transformers.testing_utils import is_pipeline_test, require_torch
 
 
@@ -73,6 +76,12 @@ def get_tiny_config_from_class(configuration_class):
 @lru_cache(maxsize=100)
 def get_tiny_tokenizer_from_checkpoint(checkpoint):
     tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+    if tokenizer.vocab_size < 300:
+        # Wav2Vec2ForCTC for instance
+        # ByT5Tokenizer
+        # all are already small enough and have no Fast version that can
+        # be retrained
+        return tokenizer
     logger.info("Training new from iterator ...")
     vocabulary = string.ascii_letters + string.digits + " "
     tokenizer = tokenizer.train_new_from_iterator(vocabulary, vocab_size=len(vocabulary), show_progress=False)
@@ -87,6 +96,12 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config):
         feature_extractor = None
     if hasattr(tiny_config, "image_size") and feature_extractor:
         feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
+
+    # Speech2TextModel specific.
+    if hasattr(tiny_config, "input_feat_per_channel") and feature_extractor:
+        feature_extractor = feature_extractor.__class__(
+            feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
+        )
     return feature_extractor
 
 
@@ -136,7 +151,26 @@ class PipelineTestCaseMeta(type):
                 else:
                     tokenizer = None
                 feature_extractor = get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config)
-                self.run_pipeline_test(model, tokenizer, feature_extractor)
+                pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor)
+                if pipeline is None:
+                    # The test can disable itself, but it should be very marginal
+                    # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
+                    return
+                self.run_pipeline_test(pipeline, examples)
+
+                def run_batch_test(pipeline, examples):
+                    # Need to copy because `Conversation` are stateful
+                    if pipeline.tokenizer is not None and pipeline.tokenizer.pad_token_id is None:
+                        return  # No batching for this and it's OK
+
+                    # 10 examples with batch size 4 means there needs to be a unfinished batch
+                    # which is important for the unbatcher
+                    dataset = [copy.deepcopy(random.choice(examples)) for i in range(10)]
+
+                    for item in pipeline(dataset, batch_size=4):
+                        pass
+
+                run_batch_test(pipeline, examples)
 
             return test
 
@@ -211,3 +245,85 @@ class CommonPipelineTest(unittest.TestCase):
         dataset = MyDataset()
         for output in text_classifier(dataset):
             self.assertEqual(output, {"label": ANY(str), "score": ANY(float)})
+
+
+@is_pipeline_test
+class PipelinePadTest(unittest.TestCase):
+    @require_torch
+    def test_pipeline_padding(self):
+        import torch
+
+        items = [
+            {
+                "label": "label1",
+                "input_ids": torch.LongTensor([[1, 23, 24, 2]]),
+                "attention_mask": torch.LongTensor([[0, 1, 1, 0]]),
+            },
+            {
+                "label": "label2",
+                "input_ids": torch.LongTensor([[1, 23, 24, 43, 44, 2]]),
+                "attention_mask": torch.LongTensor([[0, 1, 1, 1, 1, 0]]),
+            },
+        ]
+
+        self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "input_ids", 10, "right"),
+                torch.LongTensor([[1, 23, 24, 2, 10, 10], [1, 23, 24, 43, 44, 2]]),
+            )
+        )
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "input_ids", 10, "left"),
+                torch.LongTensor([[10, 10, 1, 23, 24, 2], [1, 23, 24, 43, 44, 2]]),
+            )
+        )
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "attention_mask", 0, "right"), torch.LongTensor([[0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 0]])
+            )
+        )
+
+    @require_torch
+    def test_pipeline_image_padding(self):
+        import torch
+
+        items = [
+            {
+                "label": "label1",
+                "pixel_values": torch.zeros((1, 3, 10, 10)),
+            },
+            {
+                "label": "label2",
+                "pixel_values": torch.zeros((1, 3, 10, 10)),
+            },
+        ]
+
+        self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "pixel_values", 10, "right"),
+                torch.zeros((2, 3, 10, 10)),
+            )
+        )
+
+    @require_torch
+    def test_pipeline_offset_mapping(self):
+        import torch
+
+        items = [
+            {
+                "offset_mappings": torch.zeros([1, 11, 2], dtype=torch.long),
+            },
+            {
+                "offset_mappings": torch.zeros([1, 4, 2], dtype=torch.long),
+            },
+        ]
+
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "offset_mappings", 0, "right"),
+                torch.zeros((2, 11, 2), dtype=torch.long),
+            ),
+        )
diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index 5d7e1fd3a4..342a09e2e6 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -54,8 +54,11 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
         else []
     )
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+        return conversation_agent, [Conversation("Hi there!")]
+
+    def run_pipeline_test(self, conversation_agent, _):
         # Simple
         outputs = conversation_agent(Conversation("Hi there!"))
         self.assertEqual(outputs, Conversation(past_user_inputs=["Hi there!"], generated_responses=[ANY(str)]))
diff --git a/tests/test_pipelines_feature_extraction.py b/tests/test_pipelines_feature_extraction.py
index 4a1ed8f969..87b2b10ba5 100644
--- a/tests/test_pipelines_feature_extraction.py
+++ b/tests/test_pipelines_feature_extraction.py
@@ -14,7 +14,15 @@
 
 import unittest
 
-from transformers import MODEL_MAPPING, TF_MODEL_MAPPING, CLIPConfig, FeatureExtractionPipeline, LxmertConfig, pipeline
+from transformers import (
+    MODEL_MAPPING,
+    TF_MODEL_MAPPING,
+    CLIPConfig,
+    FeatureExtractionPipeline,
+    LxmertConfig,
+    Wav2Vec2Config,
+    pipeline,
+)
 from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch
 
 from .test_pipelines_common import PipelineTestCaseMeta
@@ -61,12 +69,12 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
             raise ValueError("We expect lists of floats, nothing else")
         return shape
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         if tokenizer is None:
             self.skipTest("No tokenizer")
             return
 
-        elif isinstance(model.config, (LxmertConfig, CLIPConfig)):
+        elif isinstance(model.config, (LxmertConfig, CLIPConfig, Wav2Vec2Config)):
             self.skipTest(
                 "This is an Lxmert bimodal model, we need to find a more consistent way to switch on those models."
             )
@@ -81,11 +89,12 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
             )
 
             return
-
         feature_extractor = FeatureExtractionPipeline(
             model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
         )
+        return feature_extractor, ["This is a test", "This is another test"]
 
+    def run_pipeline_test(self, feature_extractor, examples):
         outputs = feature_extractor("This is a test")
 
         shape = self.get_shape(outputs)
diff --git a/tests/test_pipelines_fill_mask.py b/tests/test_pipelines_fill_mask.py
index fb48fe52cd..43801ef0c1 100644
--- a/tests/test_pipelines_fill_mask.py
+++ b/tests/test_pipelines_fill_mask.py
@@ -159,22 +159,32 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
         unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="pt")
         unmasker.tokenizer.pad_token_id = None
         unmasker.tokenizer.pad_token = None
-        self.run_pipeline_test(unmasker.model, unmasker.tokenizer, None)
+        self.run_pipeline_test(unmasker, [])
 
     @require_tf
     def test_model_no_pad_tf(self):
         unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="tf")
         unmasker.tokenizer.pad_token_id = None
         unmasker.tokenizer.pad_token = None
-        self.run_pipeline_test(unmasker.model, unmasker.tokenizer, None)
+        self.run_pipeline_test(unmasker, [])
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         if tokenizer is None or tokenizer.mask_token_id is None:
             self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
 
         fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+        examples = [
+            f"This is another {tokenizer.mask_token} test",
+        ]
+        return fill_masker, examples
 
-        outputs = fill_masker(f"This is a {tokenizer.mask_token}")
+    def run_pipeline_test(self, fill_masker, examples):
+        tokenizer = fill_masker.tokenizer
+        model = fill_masker.model
+
+        outputs = fill_masker(
+            f"This is a {tokenizer.mask_token}",
+        )
         self.assertEqual(
             outputs,
             [
diff --git a/tests/test_pipelines_image_classification.py b/tests/test_pipelines_image_classification.py
index e06dec9d0b..81cdde3f3f 100644
--- a/tests/test_pipelines_image_classification.py
+++ b/tests/test_pipelines_image_classification.py
@@ -44,9 +44,17 @@ else:
 class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
     model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
 
-    @require_datasets
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+
         image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor)
+        examples = [
+            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
+            "http://images.cocodataset.org/val2017/000000039769.jpg",
+        ]
+        return image_classifier, examples
+
+    @require_datasets
+    def run_pipeline_test(self, image_classifier, examples):
         outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
 
         self.assertEqual(
diff --git a/tests/test_pipelines_object_detection.py b/tests/test_pipelines_object_detection.py
index b20bebbf25..19f3447e97 100644
--- a/tests/test_pipelines_object_detection.py
+++ b/tests/test_pipelines_object_detection.py
@@ -53,9 +53,12 @@ else:
 class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
     model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
 
-    @require_datasets
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
+        return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
+
+    @require_datasets
+    def run_pipeline_test(self, object_detector, examples):
         outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
 
         self.assertGreater(len(outputs), 0)
diff --git a/tests/test_pipelines_question_answering.py b/tests/test_pipelines_question_answering.py
index cd0e7acde1..ca85c89ac9 100644
--- a/tests/test_pipelines_question_answering.py
+++ b/tests/test_pipelines_question_answering.py
@@ -32,13 +32,20 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
     model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
     tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         if isinstance(model.config, LxmertConfig):
             # This is an bimodal model, we need to find a more consistent way
             # to switch on those models.
-            return
+            return None, None
         question_answerer = QuestionAnsweringPipeline(model, tokenizer)
 
+        examples = [
+            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
+            {"question": "In what field is HuggingFace ?", "context": "HuggingFace is  an AI startup."},
+        ]
+        return question_answerer, examples
+
+    def run_pipeline_test(self, question_answerer, _):
         outputs = question_answerer(
             question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
         )
diff --git a/tests/test_pipelines_summarization.py b/tests/test_pipelines_summarization.py
index f3f77410c7..e434ed742d 100644
--- a/tests/test_pipelines_summarization.py
+++ b/tests/test_pipelines_summarization.py
@@ -36,8 +36,12 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe
     model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
     tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
+        return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]
+
+    def run_pipeline_test(self, summarizer, _):
+        model = summarizer.model
 
         outputs = summarizer("(CNN)The Palestinian Authority officially became")
         self.assertEqual(outputs, [{"summary_text": ANY(str)}])
diff --git a/tests/test_pipelines_text2text_generation.py b/tests/test_pipelines_text2text_generation.py
index 2ca6f93e9f..7de2b263b2 100644
--- a/tests/test_pipelines_text2text_generation.py
+++ b/tests/test_pipelines_text2text_generation.py
@@ -30,9 +30,11 @@ class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTest
     model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
     tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
+        return generator, ["Something to write", "Something else"]
 
+    def run_pipeline_test(self, generator, _):
         outputs = generator("Something there")
         self.assertEqual(outputs, [{"generated_text": ANY(str)}])
         # These are encoder decoder, they don't just append to incoming string
diff --git a/tests/test_pipelines_text_classification.py b/tests/test_pipelines_text_classification.py
index 3daad11597..7bc794b4d3 100644
--- a/tests/test_pipelines_text_classification.py
+++ b/tests/test_pipelines_text_classification.py
@@ -72,9 +72,12 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC
         outputs = text_classifier("Birds are a type of animal")
         self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
+        return text_classifier, ["HuggingFace is in", "This is another test"]
 
+    def run_pipeline_test(self, text_classifier, _):
+        model = text_classifier.model
         # Small inputs because BartTokenizer tiny has maximum position embeddings = 22
         valid_inputs = "HuggingFace is in"
         outputs = text_classifier(valid_inputs)
diff --git a/tests/test_pipelines_text_generation.py b/tests/test_pipelines_text_generation.py
index 3618a2be73..ebe71a5591 100644
--- a/tests/test_pipelines_text_generation.py
+++ b/tests/test_pipelines_text_generation.py
@@ -88,8 +88,14 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
             ],
         )
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
+        return text_generator, ["This is a test", "Another test"]
+
+    def run_pipeline_test(self, text_generator, _):
+        model = text_generator.model
+        tokenizer = text_generator.tokenizer
+
         outputs = text_generator("This is a test")
         self.assertEqual(outputs, [{"generated_text": ANY(str)}])
         self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
diff --git a/tests/test_pipelines_token_classification.py b/tests/test_pipelines_token_classification.py
index d94e4cc7f8..caeef47d95 100644
--- a/tests/test_pipelines_token_classification.py
+++ b/tests/test_pipelines_token_classification.py
@@ -45,8 +45,13 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
     model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
     tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
         token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
+        return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]
+
+    def run_pipeline_test(self, token_classifier, _):
+        model = token_classifier.model
+        tokenizer = token_classifier.tokenizer
 
         outputs = token_classifier("A simple string")
         self.assertIsInstance(outputs, list)
diff --git a/tests/test_pipelines_translation.py b/tests/test_pipelines_translation.py
index 50b13331f3..7185fe9c18 100644
--- a/tests/test_pipelines_translation.py
+++ b/tests/test_pipelines_translation.py
@@ -20,6 +20,7 @@ from transformers import (
     MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
     TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
     MBart50TokenizerFast,
+    MBartConfig,
     MBartForConditionalGeneration,
     TranslationPipeline,
     pipeline,
@@ -34,14 +35,16 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
     model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
     tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
-        translator = TranslationPipeline(model=model, tokenizer=tokenizer)
-        try:
-            outputs = translator("Some string")
-        except ValueError:
-            # Triggered by m2m langages
-            src_lang, tgt_lang = list(translator.tokenizer.lang_code_to_id.keys())[:2]
-            outputs = translator("Some string", src_lang=src_lang, tgt_lang=tgt_lang)
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if isinstance(model.config, MBartConfig):
+            src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
+            translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
+        else:
+            translator = TranslationPipeline(model=model, tokenizer=tokenizer)
+        return translator, ["Some string", "Some other text"]
+
+    def run_pipeline_test(self, translator, _):
+        outputs = translator("Some string")
         self.assertEqual(outputs, [{"translation_text": ANY(str)}])
 
     @require_torch
diff --git a/tests/test_pipelines_zero_shot.py b/tests/test_pipelines_zero_shot.py
index ae47eb626c..ed564581e5 100644
--- a/tests/test_pipelines_zero_shot.py
+++ b/tests/test_pipelines_zero_shot.py
@@ -31,9 +31,13 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT
     model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
     tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
 
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
-        classifier = ZeroShotClassificationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        classifier = ZeroShotClassificationPipeline(
+            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
+        )
+        return classifier, ["Who are you voting for in 2020?", "My stomach hurts."]
 
+    def run_pipeline_test(self, classifier, _):
         outputs = classifier("Who are you voting for in 2020?", candidate_labels="politics")
         self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})