Adding batch_size support for (almost) all pipelines (#13724)

* Tentative enabling of `batch_size` for pipelines. * Add systematic test for pipeline batching. * Enabling batch_size on almost all pipelines - Not `zero-shot` (it's already passing stuff as batched so trickier) - Not `QA` (preprocess uses squad features, we need to switch to real tensors at this boundary. * Adding `min_length_for_response` for conversational. * Making CTC, speech mappings avaiable regardless of framework. * Attempt at fixing automatic tests (ffmpeg not enabled for fast tests) * Removing ffmpeg dependency in tests. * Small fixes. * Slight cleanup. * Adding docs and adressing comments. * Quality. * Update docs/source/main_classes/pipelines.rst Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/pipelines/question_answering.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/pipelines/zero_shot_classification.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Improving docs. * Update docs/source/main_classes/pipelines.rst Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com> * N -> oberved_batch_size softmax trick. * Follow `padding_side`. * Supporting image pipeline batching (and padding). * Rename `unbatch` -> `loader_batch`. * unbatch_size forgot. * Custom padding for offset mappings. * Attempt to remove librosa. * Adding require_audio. * torchaudio. * Back to using datasets librosa. * Adding help to set a pad_token on the tokenizer. * Update src/transformers/pipelines/base.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/pipelines/base.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/pipelines/base.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Quality. Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com>
2021-10-29 11:34:18 +02:00
parent 4469010c1b
commit be236361f1
27 changed files with 629 additions and 64 deletions
--- a/docs/source/main_classes/pipelines.rst
+++ b/docs/source/main_classes/pipelines.rst
@@ -71,6 +71,11 @@ GPU. If it doesn't don't hesitate to create an issue.
 .. code-block::
    import datasets
    from transformers import pipeline
    from transformers.pipelines.base import KeyDataset
    import tqdm
    pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
    dataset = datasets.load_dataset("superb", name="asr", split="test")
@@ -85,6 +90,144 @@ GPU. If it doesn't don't hesitate to create an issue.
 .. autofunction:: transformers.pipeline
 Pipeline batching
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 All pipelines (except `zero-shot-classification` and `question-answering` currently) can use batching. This will work
 whenever the pipeline uses its streaming ability (so when passing lists or :obj:`Dataset`).
 .. code-block::
    from transformers import pipeline                                                   
    from transformers.pipelines.base import KeyDataset
    import datasets
    import tqdm                                                                         
    dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
    pipe = pipeline("text-classification", device=0)
    for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
        print(out)
        # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
        # Exactly the same output as before, but the content are passed
        # as batches to the model
 .. warning::
    However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
    on hardware, data and the actual model being used.
    Example where it's most a speedup:
 .. code-block::
    from transformers import pipeline                                                   
    from torch.utils.data import Dataset                                                
    import tqdm                                                                         
    pipe = pipeline("text-classification", device=0)                                    
    class MyDataset(Dataset):                                                           
        def __len__(self):                                                              
            return 5000                                                                 
        def __getitem__(self, i):                                                       
            return "This is a test"                                                     
    dataset = MyDataset()   
    for batch_size in [1, 8, 64, 256]:
        print("-" * 30)                                                                     
        print(f"Streaming batch_size={batch_size}")    
        for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):              
            pass
 .. code-block::
    # On GTX 970
    ------------------------------
    Streaming no batching
    100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
    ------------------------------
    Streaming batch_size=8
    100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
    ------------------------------
    Streaming batch_size=64
    100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
    ------------------------------
    Streaming batch_size=256
    100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
    (diminishing returns, saturated the GPU)
 Example where it's most a slowdown:
 .. code-block::
    class MyDataset(Dataset):                                                           
        def __len__(self):                                                              
            return 5000                                                                 
        def __getitem__(self, i):                                                       
            if i % 64 == 0:                                                          
                n = 100                                                              
            else:                                                                    
                n = 1                                                                
            return "This is a test" * n
 This is a occasional very long sentence compared to the other. In that case, the **whole** batch will need to be 400
 tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
 bigger batches, the program simply crashes.
 .. code-block::
    ------------------------------
    Streaming no batching
    100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
    ------------------------------
    Streaming batch_size=8
    100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
    ------------------------------
    Streaming batch_size=64
    100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
    ------------------------------
    Streaming batch_size=256
      0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
    Traceback (most recent call last):
      File "/home/nicolas/src/transformers/test.py", line 42, in <module>
        for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
    ....
        q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
    RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
 There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
 thumb:
 For users, a rule of thumb is:
 - **Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
  only way to go.**
 - If you are latency constrained (live product doing inference), don't batch
 - If you are using CPU, don't batch.
 - If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
      - If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
        try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
        control the sequence_length.)
      - If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
        it until you get OOMs.
      - The larger the GPU the more likely batching is going to be more interesting
 - As soon as you enable batching, make sure you can handle OOMs nicely.
 Implementing a pipeline
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -584,6 +584,7 @@ if is_torch_available():
        [
            "MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
            "MODEL_FOR_CAUSAL_LM_MAPPING",
            "MODEL_FOR_CTC_MAPPING",
            "MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
            "MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
            "MODEL_FOR_MASKED_LM_MAPPING",
@@ -594,6 +595,7 @@ if is_torch_available():
            "MODEL_FOR_QUESTION_ANSWERING_MAPPING",
            "MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
            "MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
            "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING",
            "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
            "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
            "MODEL_MAPPING",
@@ -2430,6 +2432,7 @@ if TYPE_CHECKING:
        from .models.auto import (
            MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
            MODEL_FOR_CAUSAL_LM_MAPPING,
            MODEL_FOR_CTC_MAPPING,
            MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
            MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
            MODEL_FOR_MASKED_LM_MAPPING,
@@ -2440,6 +2443,7 @@ if TYPE_CHECKING:
            MODEL_FOR_QUESTION_ANSWERING_MAPPING,
            MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
            MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
            MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
            MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
            MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
            MODEL_MAPPING,
--- a/src/transformers/pipelines/automatic_speech_recognition.py
+++ b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -169,6 +169,10 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
        elif model_class in MODEL_FOR_CTC_MAPPING.values():
            outputs = self.model(**model_inputs)
            tokens = outputs.logits.squeeze(0).argmax(dim=-1)
        else:
            logger.warning("This is an unknown class, treating it as CTC.")
            outputs = self.model(**model_inputs)
            tokens = outputs.logits.squeeze(0).argmax(dim=-1)
        return tokens
    def postprocess(self, model_outputs):
--- a/src/transformers/pipelines/base.py
+++ b/src/transformers/pipelines/base.py
@@ -25,6 +25,7 @@ from contextlib import contextmanager
 from os.path import abspath, exists
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 import numpy as np
 from packaging import version
 from ..feature_extraction_utils import PreTrainedFeatureExtractor
@@ -59,12 +60,80 @@ if TYPE_CHECKING:
 logger = logging.get_logger(__name__)
-def collate_fn(items):
+def no_collate_fn(items):
    if len(items) != 1:
        raise ValueError("This collate_fn is meant to be used with batch_size=1")
    return items[0]
 def _pad(items, key, padding_value, padding_side):
    batch_size = len(items)
    if isinstance(items[0][key], torch.Tensor):
        # Others include `attention_mask` etc...
        shape = items[0][key].shape
        dim = len(shape)
        if dim == 4:
            # This is probable image so padding shouldn't be necessary
            # B, C, H, W
            return torch.cat([item[key] for item in items], dim=0)
        max_length = max(item[key].shape[1] for item in items)
        dtype = items[0][key].dtype
        if dim == 2:
            tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value
        elif dim == 3:
            tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value
        for i, item in enumerate(items):
            if dim == 2:
                if padding_side == "left":
                    tensor[i, -len(item[key][0]) :] = item[key][0].clone()
                else:
                    tensor[i, : len(item[key][0])] = item[key][0].clone()
            elif dim == 3:
                if padding_side == "left":
                    tensor[i, -len(item[key][0]) :, :] = item[key][0].clone()
                else:
                    tensor[i, : len(item[key][0]), :] = item[key][0].clone()
        return tensor
    else:
        return [item[key] for item in items]
 def pad_collate_fn(tokenizer, feature_extractor):
    padding_side = "right"
    if tokenizer is None and feature_extractor is None:
        raise ValueError("Pipeline without tokenizer or feature_extractor cannot do batching")
    if tokenizer is not None:
        if tokenizer.pad_token_id is None:
            raise ValueError(
                "Pipeline with tokenizer without pad_token cannot do batching. You can try to set it with "
                "`pipe.tokenizer.pad_token_id = model.config.eos_token_id`."
            )
        else:
            padding_value = tokenizer.pad_token_id
            padding_side = tokenizer.padding_side
    if feature_extractor is not None:
        # Feature extractor can be images, where no padding is expected
        padding_value = getattr(feature_extractor, "padding_value", None)
        padding_side = getattr(feature_extractor, "padding_side", None)
    def inner(items):
        keys = set(items[0].keys())
        for item in items:
            if set(item.keys()) != keys:
                raise ValueError(
                    f"The elements of the batch contain different keys. Cannot batch them ({set(item.keys())} != {keys})"
                )
        # input_values, input_pixels, input_ids, ...
        padded = {
            key: _pad(items, key, padding_value if key.startswith("input_") else 0, padding_side) for key in keys
        }
        return padded
    return inner
 def infer_framework_load_model(
    model,
    config: AutoConfig,
@@ -591,6 +660,13 @@ PIPELINE_INIT_ARGS = r"""
            is provided.
        task (:obj:`str`, defaults to :obj:`""`):
            A task-identifier for the pipeline.
        num_workers (:obj:`int`, `optional`, defaults to 8):
            When the pipeline will use `DataLoader` (when passing a dataset, on GPU for a Pytorch model), the number of
            workers to be used.
        batch_size (:obj:`int`, `optional`, defaults to 1):
            When the pipeline will use `DataLoader` (when passing a dataset, on GPU for a Pytorch model), the size of
            the batch to use, for inference this is not always beneficial, please read `Batching with pipelines
            <https://huggingface.co/transformers/main_classes/pipelines.html#pipeline-batching>`_ .
        args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`):
            Reference to the object in charge of parsing supplied pipeline parameters.
        device (:obj:`int`, `optional`, defaults to -1):
@@ -617,10 +693,44 @@ if is_torch_available():
            return processed
    class PipelineIterator(IterableDataset):
-        def __init__(self, loader, infer, params):
+        def __init__(self, loader, infer, params, loader_batch_size=None):
            """
            Roughly equivalent to
            .. code-block::
                for item in loader:
                    yield infer(item, **params)
            Arguments:
                loader (:obj:`torch.utils.data.DataLoader` or any iterator):
                    The iterator that will be used to apply :obj:`infer` on.
                infer (any function):
                    The function to apply of each element of :obj:`loader`.
                params (:obj:`dict`):
                    The parameters passed to :obj:`infer` along with every item
                loader_batch_size (:obj:`int`, `optional`):
                    If specified, the items of :obj:`loader` are supposed to come as batch, and are loader_batched here
                    making it roughly behave as
                    .. code-block::
                        for items in loader:
                            for i in loader_batch_size:
                                item = items[i]
                                yield infer(item, **params)
            """
            self.loader = loader
            self.infer = infer
            self.params = params
            if loader_batch_size == 1:
                # Let's spare some time by deactivating altogether
                loader_batch_size = None
            self.loader_batch_size = loader_batch_size
            # Internal bookkeeping
            self._loader_batch_index = None
            self._loader_batch_data = None
        def __len__(self):
            return len(self.loader)
@@ -629,9 +739,48 @@ if is_torch_available():
            self.iterator = iter(self.loader)
            return self
        def loader_batch_item(self):
            if isinstance(self._loader_batch_data, torch.Tensor):
                result = self._loader_batch_data[self._loader_batch_index]
            else:
                loader_batched = {}
                for k, element in self._loader_batch_data.items():
                    if k == "past_key_values":
                        continue
                    if isinstance(element[self._loader_batch_index], torch.Tensor):
                        loader_batched[k] = element[self._loader_batch_index].unsqueeze(0)
                    elif isinstance(element[self._loader_batch_index], np.ndarray):
                        loader_batched[k] = np.expand_dims(element[self._loader_batch_index], 0)
                    else:
                        loader_batched[k] = element[self._loader_batch_index]
                result = self._loader_batch_data.__class__(loader_batched)
            self._loader_batch_index += 1
            return result
        def __next__(self):
            if self._loader_batch_index is not None and self._loader_batch_index < self.loader_batch_size:
                return self.loader_batch_item()
            item = next(self.iterator)
            processed = self.infer(item, **self.params)
            if self.loader_batch_size is not None:
                if isinstance(processed, torch.Tensor):
                    first_tensor = processed
                else:
                    key = list(processed.keys())[0]
                    first_tensor = processed[key]
                if isinstance(first_tensor, list):
                    observed_batch_size = len(first_tensor)
                else:
                    observed_batch_size = first_tensor.shape[0]
                if 0 < observed_batch_size < self.loader_batch_size:
                    # Could be last batch so we can't unroll as many
                    # elements.
                    self.loader_batch_size = observed_batch_size
                self._loader_batch_data = processed
                self._loader_batch_index = 0
                return self.loader_batch_item()
            else:
                return processed
    class KeyDataset(Dataset):
@@ -881,17 +1030,20 @@ class Pipeline(_ScikitCompat):
                raise ValueError(f"Framework {self.framework} is not supported")
        return model_outputs
-    def get_iterator(self, inputs, num_workers: int, preprocess_params, forward_params, postprocess_params):
+    def get_iterator(
        self, inputs, num_workers: int, batch_size: int, preprocess_params, forward_params, postprocess_params
    ):
        if "TOKENIZERS_PARALLELISM" not in os.environ:
            logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already")
            os.environ["TOKENIZERS_PARALLELISM"] = "false"
        dataset = PipelineDataset(inputs, self.preprocess, preprocess_params)
-        dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=1, collate_fn=collate_fn)
+        collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
-        model_iterator = PipelineIterator(dataloader, self.forward, forward_params)
+        dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
        model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
        final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
        return final_iterator
-    def __call__(self, inputs, *args, num_workers=8, **kwargs):
+    def __call__(self, inputs, *args, num_workers=0, batch_size=1, **kwargs):
        if args:
            logger.warning(f"Ignoring args : {args}")
        preprocess_params, forward_params, postprocess_params = self._sanitize_parameters(**kwargs)
@@ -910,14 +1062,16 @@ class Pipeline(_ScikitCompat):
        if isinstance(inputs, list):
            if self.framework == "pt":
                final_iterator = self.get_iterator(
-                    inputs, num_workers, preprocess_params, forward_params, postprocess_params
+                    inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
                )
                outputs = [output for output in final_iterator]
                return outputs
            else:
                return self.run_multi(inputs, preprocess_params, forward_params, postprocess_params)
        elif Dataset is not None and isinstance(inputs, Dataset):
-            return self.get_iterator(inputs, num_workers, preprocess_params, forward_params, postprocess_params)
+            return self.get_iterator(
                inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
            )
        else:
            return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -243,7 +243,7 @@ class ConversationalPipeline(Pipeline):
            return outputs[0]
        return outputs
-    def preprocess(self, conversation: Conversation) -> Dict[str, Any]:
+    def preprocess(self, conversation: Conversation, min_length_for_response=32) -> Dict[str, Any]:
        if not isinstance(conversation, Conversation):
            raise ValueError("ConversationalPipeline, expects Conversation as inputs")
        if conversation.new_user_input is None:
@@ -274,18 +274,18 @@ class ConversationalPipeline(Pipeline):
            if "attention_mask" in model_inputs:
                model_inputs["attention_mask"] = model_inputs["attention_mask"][:, -trim:]
        conversation = model_inputs.pop("conversation")
-        model_inputs["max_length"] = max_length
+        generate_kwargs["max_length"] = max_length
        output_ids = self.model.generate(**model_inputs, **generate_kwargs)
        if self.model.config.is_encoder_decoder:
            start_position = 1
        else:
            start_position = n
-        return {"output_ids": output_ids[0, start_position:], "conversation": conversation}
+        return {"output_ids": output_ids[:, start_position:], "conversation": conversation}
    def postprocess(self, model_outputs, clean_up_tokenization_spaces=True):
        output_ids = model_outputs["output_ids"]
        answer = self.tokenizer.decode(
-            output_ids,
+            output_ids[0],
            skip_special_tokens=True,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        )
--- a/src/transformers/pipelines/fill_mask.py
+++ b/src/transformers/pipelines/fill_mask.py
@@ -89,14 +89,14 @@ class FillMaskPipeline(Pipeline):
    def _forward(self, model_inputs):
        model_outputs = self.model(**model_inputs)
-        model_outputs["input_ids"] = model_inputs["input_ids"][0]
+        model_outputs["input_ids"] = model_inputs["input_ids"]
        return model_outputs
    def postprocess(self, model_outputs, top_k=5, target_ids=None):
        # Cap top_k if there are targets
        if target_ids is not None and target_ids.shape[0] < top_k:
            top_k = target_ids.shape[0]
-        input_ids = model_outputs["input_ids"]
+        input_ids = model_outputs["input_ids"][0]
        outputs = model_outputs["logits"]
        result = []
--- a/src/transformers/pipelines/object_detection.py
+++ b/src/transformers/pipelines/object_detection.py
@@ -114,11 +114,12 @@ class ObjectDetectionPipeline(Pipeline):
    def _forward(self, model_inputs):
        target_size = model_inputs.pop("target_size")
        outputs = self.model(**model_inputs)
-        model_outputs = {"outputs": outputs, "target_size": target_size}
+        model_outputs = outputs.__class__({"target_size": target_size, **outputs})
        return model_outputs
    def postprocess(self, model_outputs, threshold=0.9):
-        raw_annotations = self.feature_extractor.post_process(model_outputs["outputs"], model_outputs["target_size"])
+        target_size = model_outputs["target_size"]
        raw_annotations = self.feature_extractor.post_process(model_outputs, target_size)
        raw_annotation = raw_annotations[0]
        keep = raw_annotation["scores"] > threshold
        scores = raw_annotation["scores"][keep]
--- a/src/transformers/pipelines/question_answering.py
+++ b/src/transformers/pipelines/question_answering.py
@@ -8,9 +8,12 @@ from ..data import SquadExample, SquadFeatures, squad_convert_examples_to_featur
 from ..file_utils import PaddingStrategy, add_end_docstrings, is_tf_available, is_torch_available
 from ..modelcard import ModelCard
 from ..tokenization_utils import PreTrainedTokenizer
 from ..utils import logging
 from .base import PIPELINE_INIT_ARGS, ArgumentHandler, Pipeline
 logger = logging.get_logger(__name__)
 if TYPE_CHECKING:
    from ..modeling_tf_utils import TFPreTrainedModel
    from ..modeling_utils import PreTrainedModel
@@ -241,6 +244,9 @@ class QuestionAnsweringPipeline(Pipeline):
            - **end** (:obj:`int`) -- The character end index of the answer (in the tokenized version of the input).
            - **answer** (:obj:`str`) -- The answer to the question.
        """
        if kwargs.get("batch_size", 1) > 1:
            logger.error("Batch_size > 1 is not supported for question answering pipeline, setting it to 1.")
            kwargs["batch_size"] = 1
        # Convert inputs to features
        examples = self._args_parser(*args, **kwargs)
--- a/src/transformers/pipelines/token_classification.py
+++ b/src/transformers/pipelines/token_classification.py
@@ -204,12 +204,12 @@ class TokenClassificationPipeline(Pipeline):
        offset_mapping = model_inputs.pop("offset_mapping", None)
        sentence = model_inputs.pop("sentence")
        if self.framework == "tf":
-            outputs = self.model(model_inputs.data)[0][0]
+            logits = self.model(model_inputs.data)[0]
        else:
-            outputs = self.model(**model_inputs)[0][0]
+            logits = self.model(**model_inputs)[0]
        return {
-            "outputs": outputs,
+            "logits": logits,
            "special_tokens_mask": special_tokens_mask,
            "offset_mapping": offset_mapping,
            "sentence": sentence,
@@ -217,13 +217,16 @@ class TokenClassificationPipeline(Pipeline):
        }
    def postprocess(self, model_outputs, aggregation_strategy=AggregationStrategy.NONE):
-        outputs = model_outputs["outputs"].numpy()
+        logits = model_outputs["logits"][0].numpy()
        sentence = model_outputs["sentence"]
        input_ids = model_outputs["input_ids"][0]
        offset_mapping = model_outputs["offset_mapping"][0] if model_outputs["offset_mapping"] is not None else None
        special_tokens_mask = model_outputs["special_tokens_mask"][0].numpy()
-        scores = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True)
+        maxes = np.max(logits, axis=-1, keepdims=True)
        shifted_exp = np.exp(logits - maxes)
        scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
        pre_entities = self.gather_pre_entities(
            sentence, input_ids, scores, offset_mapping, special_tokens_mask, aggregation_strategy
        )
--- a/src/transformers/pipelines/zero_shot_classification.py
+++ b/src/transformers/pipelines/zero_shot_classification.py
@@ -183,6 +183,9 @@ class ZeroShotClassificationPipeline(Pipeline):
            - **labels** (:obj:`List[str]`) -- The labels sorted by order of likelihood.
            - **scores** (:obj:`List[float]`) -- The probabilities for each of the labels.
        """
        if kwargs.get("batch_size", 1) > 1:
            logger.error("Batch size > 1 is not supported for zero-shot pipeline, setting batch_size=1.")
            kwargs["batch_size"] = 1
        if len(args) == 0:
            pass
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -313,6 +313,9 @@ MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING = None
 MODEL_FOR_CAUSAL_LM_MAPPING = None
 MODEL_FOR_CTC_MAPPING = None
 MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
@@ -343,6 +346,9 @@ MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = None
 MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = None
 MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING = None
 MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = None
--- a/tests/test_pipelines_audio_classification.py
+++ b/tests/test_pipelines_audio_classification.py
@@ -24,6 +24,7 @@ from transformers.testing_utils import (
    require_datasets,
    require_tf,
    require_torch,
    require_torchaudio,
    slow,
 )
@@ -35,15 +36,16 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
 class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
-    @require_datasets
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
    @slow
    def run_pipeline_test(self, model, tokenizer, feature_extractor):
        import datasets
        audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor)
        # test with a raw waveform
        audio = np.zeros((34000,))
        audio2 = np.zeros((14000,))
        return audio_classifier, [audio2, audio]
    def run_pipeline_test(self, audio_classifier, examples):
        audio2, audio = examples
        output = audio_classifier(audio)
        # by default a model is initialized with num_labels=2
        self.assertEqual(
@@ -61,10 +63,17 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
            ],
        )
        self.run_torchaudio(audio_classifier)
    @require_datasets
    @require_torchaudio
    def run_torchaudio(self, audio_classifier):
        import datasets
        # test with a local file
        dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
-        filename = dataset[0]["file"]
+        audio = dataset[0]["audio"]["array"]
-        output = audio_classifier(filename)
+        output = audio_classifier(audio)
        self.assertEqual(
            output,
            [
--- a/tests/test_pipelines_automatic_speech_recognition.py
+++ b/tests/test_pipelines_automatic_speech_recognition.py
@@ -14,11 +14,28 @@
 import unittest
 import numpy as np
 import pytest
-from transformers import AutoFeatureExtractor, AutoTokenizer, Speech2TextForConditionalGeneration, Wav2Vec2ForCTC
+from transformers import (
    MODEL_FOR_CTC_MAPPING,
    MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
    AutoFeatureExtractor,
    AutoTokenizer,
    Speech2TextForConditionalGeneration,
    Wav2Vec2ForCTC,
 )
 from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline
-from transformers.testing_utils import is_pipeline_test, require_datasets, require_torch, require_torchaudio, slow
+from transformers.testing_utils import (
    is_pipeline_test,
    require_datasets,
    require_tf,
    require_torch,
    require_torchaudio,
    slow,
 )
 from .test_pipelines_common import ANY, PipelineTestCaseMeta
 # We can't use this mixin because it assumes TF support.
@@ -26,14 +43,42 @@ from transformers.testing_utils import is_pipeline_test, require_datasets, requi
@is_pipeline_test
-class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
+class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = {
        k: v
        for k, v in (list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING.items()) if MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING else [])
        + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
    }
    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        if tokenizer is None:
            # Side effect of no Fast Tokenizer class for these model, so skipping
            # But the slow tokenizer test should still run as they're quite small
            self.skipTest("No tokenizer available")
            return
            # return None, None
        speech_recognizer = AutomaticSpeechRecognitionPipeline(
            model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
        )
        # test with a raw waveform
        audio = np.zeros((34000,))
        audio2 = np.zeros((14000,))
        return speech_recognizer, [audio, audio2]
    def run_pipeline_test(self, speech_recognizer, examples):
        audio = np.zeros((34000,))
        outputs = speech_recognizer(audio)
        self.assertEqual(outputs, {"text": ANY(str)})
    @require_torch
    @slow
    def test_pt_defaults(self):
        pipeline("automatic-speech-recognition", framework="pt")
    @require_torch
-    def test_torch_small(self):
+    def test_small_model_pt(self):
        import numpy as np
        speech_recognizer = pipeline(
@@ -46,6 +91,10 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        output = speech_recognizer(waveform)
        self.assertEqual(output, {"text": "(Applaudissements)"})
    @require_tf
    def test_small_model_tf(self):
        self.skipTest("Tensorflow not supported yet.")
    @require_torch
    def test_torch_small_no_tokenizer_files(self):
        # test that model without tokenizer file cannot be loaded
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import copy
 import importlib
 import logging
 import random
 import string
 import unittest
 from abc import abstractmethod
@@ -21,6 +23,7 @@ from functools import lru_cache
 from unittest import skipIf
 from transformers import FEATURE_EXTRACTOR_MAPPING, TOKENIZER_MAPPING, AutoFeatureExtractor, AutoTokenizer, pipeline
 from transformers.pipelines.base import _pad
 from transformers.testing_utils import is_pipeline_test, require_torch
@@ -73,6 +76,12 @@ def get_tiny_config_from_class(configuration_class):
@lru_cache(maxsize=100)
 def get_tiny_tokenizer_from_checkpoint(checkpoint):
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    if tokenizer.vocab_size < 300:
        # Wav2Vec2ForCTC for instance
        # ByT5Tokenizer
        # all are already small enough and have no Fast version that can
        # be retrained
        return tokenizer
    logger.info("Training new from iterator ...")
    vocabulary = string.ascii_letters + string.digits + " "
    tokenizer = tokenizer.train_new_from_iterator(vocabulary, vocab_size=len(vocabulary), show_progress=False)
@@ -87,6 +96,12 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config):
        feature_extractor = None
    if hasattr(tiny_config, "image_size") and feature_extractor:
        feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
    # Speech2TextModel specific.
    if hasattr(tiny_config, "input_feat_per_channel") and feature_extractor:
        feature_extractor = feature_extractor.__class__(
            feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
        )
    return feature_extractor
@@ -136,7 +151,26 @@ class PipelineTestCaseMeta(type):
                else:
                    tokenizer = None
                feature_extractor = get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config)
-                self.run_pipeline_test(model, tokenizer, feature_extractor)
+                pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor)
                if pipeline is None:
                    # The test can disable itself, but it should be very marginal
                    # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
                    return
                self.run_pipeline_test(pipeline, examples)
                def run_batch_test(pipeline, examples):
                    # Need to copy because `Conversation` are stateful
                    if pipeline.tokenizer is not None and pipeline.tokenizer.pad_token_id is None:
                        return  # No batching for this and it's OK
                    # 10 examples with batch size 4 means there needs to be a unfinished batch
                    # which is important for the unbatcher
                    dataset = [copy.deepcopy(random.choice(examples)) for i in range(10)]
                    for item in pipeline(dataset, batch_size=4):
                        pass
                run_batch_test(pipeline, examples)
            return test
@@ -211,3 +245,85 @@ class CommonPipelineTest(unittest.TestCase):
        dataset = MyDataset()
        for output in text_classifier(dataset):
            self.assertEqual(output, {"label": ANY(str), "score": ANY(float)})
@is_pipeline_test
 class PipelinePadTest(unittest.TestCase):
    @require_torch
    def test_pipeline_padding(self):
        import torch
        items = [
            {
                "label": "label1",
                "input_ids": torch.LongTensor([[1, 23, 24, 2]]),
                "attention_mask": torch.LongTensor([[0, 1, 1, 0]]),
            },
            {
                "label": "label2",
                "input_ids": torch.LongTensor([[1, 23, 24, 43, 44, 2]]),
                "attention_mask": torch.LongTensor([[0, 1, 1, 1, 1, 0]]),
            },
        ]
        self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
        self.assertTrue(
            torch.allclose(
                _pad(items, "input_ids", 10, "right"),
                torch.LongTensor([[1, 23, 24, 2, 10, 10], [1, 23, 24, 43, 44, 2]]),
            )
        )
        self.assertTrue(
            torch.allclose(
                _pad(items, "input_ids", 10, "left"),
                torch.LongTensor([[10, 10, 1, 23, 24, 2], [1, 23, 24, 43, 44, 2]]),
            )
        )
        self.assertTrue(
            torch.allclose(
                _pad(items, "attention_mask", 0, "right"), torch.LongTensor([[0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 0]])
            )
        )
    @require_torch
    def test_pipeline_image_padding(self):
        import torch
        items = [
            {
                "label": "label1",
                "pixel_values": torch.zeros((1, 3, 10, 10)),
            },
            {
                "label": "label2",
                "pixel_values": torch.zeros((1, 3, 10, 10)),
            },
        ]
        self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
        self.assertTrue(
            torch.allclose(
                _pad(items, "pixel_values", 10, "right"),
                torch.zeros((2, 3, 10, 10)),
            )
        )
    @require_torch
    def test_pipeline_offset_mapping(self):
        import torch
        items = [
            {
                "offset_mappings": torch.zeros([1, 11, 2], dtype=torch.long),
            },
            {
                "offset_mappings": torch.zeros([1, 4, 2], dtype=torch.long),
            },
        ]
        self.assertTrue(
            torch.allclose(
                _pad(items, "offset_mappings", 0, "right"),
                torch.zeros((2, 11, 2), dtype=torch.long),
            ),
        )
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -54,8 +54,11 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
        else []
    )
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
        return conversation_agent, [Conversation("Hi there!")]
    def run_pipeline_test(self, conversation_agent, _):
        # Simple
        outputs = conversation_agent(Conversation("Hi there!"))
        self.assertEqual(outputs, Conversation(past_user_inputs=["Hi there!"], generated_responses=[ANY(str)]))
--- a/tests/test_pipelines_feature_extraction.py
+++ b/tests/test_pipelines_feature_extraction.py
@@ -14,7 +14,15 @@
 import unittest
-from transformers import MODEL_MAPPING, TF_MODEL_MAPPING, CLIPConfig, FeatureExtractionPipeline, LxmertConfig, pipeline
+from transformers import (
    MODEL_MAPPING,
    TF_MODEL_MAPPING,
    CLIPConfig,
    FeatureExtractionPipeline,
    LxmertConfig,
    Wav2Vec2Config,
    pipeline,
 )
 from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch
 from .test_pipelines_common import PipelineTestCaseMeta
@@ -61,12 +69,12 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
            raise ValueError("We expect lists of floats, nothing else")
        return shape
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        if tokenizer is None:
            self.skipTest("No tokenizer")
            return
-        elif isinstance(model.config, (LxmertConfig, CLIPConfig)):
+        elif isinstance(model.config, (LxmertConfig, CLIPConfig, Wav2Vec2Config)):
            self.skipTest(
                "This is an Lxmert bimodal model, we need to find a more consistent way to switch on those models."
            )
@@ -81,11 +89,12 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
            )
            return
        feature_extractor = FeatureExtractionPipeline(
            model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
        )
        return feature_extractor, ["This is a test", "This is another test"]
    def run_pipeline_test(self, feature_extractor, examples):
        outputs = feature_extractor("This is a test")
        shape = self.get_shape(outputs)
--- a/tests/test_pipelines_fill_mask.py
+++ b/tests/test_pipelines_fill_mask.py
@@ -159,22 +159,32 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
        unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="pt")
        unmasker.tokenizer.pad_token_id = None
        unmasker.tokenizer.pad_token = None
-        self.run_pipeline_test(unmasker.model, unmasker.tokenizer, None)
+        self.run_pipeline_test(unmasker, [])
    @require_tf
    def test_model_no_pad_tf(self):
        unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="tf")
        unmasker.tokenizer.pad_token_id = None
        unmasker.tokenizer.pad_token = None
-        self.run_pipeline_test(unmasker.model, unmasker.tokenizer, None)
+        self.run_pipeline_test(unmasker, [])
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        if tokenizer is None or tokenizer.mask_token_id is None:
            self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
        examples = [
            f"This is another {tokenizer.mask_token} test",
        ]
        return fill_masker, examples
-        outputs = fill_masker(f"This is a {tokenizer.mask_token}")
+    def run_pipeline_test(self, fill_masker, examples):
        tokenizer = fill_masker.tokenizer
        model = fill_masker.model
        outputs = fill_masker(
            f"This is a {tokenizer.mask_token}",
        )
        self.assertEqual(
            outputs,
            [
--- a/tests/test_pipelines_image_classification.py
+++ b/tests/test_pipelines_image_classification.py
@@ -44,9 +44,17 @@ else:
 class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
-    @require_datasets
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+
        image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor)
        examples = [
            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
            "http://images.cocodataset.org/val2017/000000039769.jpg",
        ]
        return image_classifier, examples
    @require_datasets
    def run_pipeline_test(self, image_classifier, examples):
        outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
        self.assertEqual(
--- a/tests/test_pipelines_object_detection.py
+++ b/tests/test_pipelines_object_detection.py
@@ -53,9 +53,12 @@ else:
 class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
-    @require_datasets
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
    def run_pipeline_test(self, model, tokenizer, feature_extractor):
        object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
        return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
    @require_datasets
    def run_pipeline_test(self, object_detector, examples):
        outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
        self.assertGreater(len(outputs), 0)
--- a/tests/test_pipelines_question_answering.py
+++ b/tests/test_pipelines_question_answering.py
@@ -32,13 +32,20 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
    tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        if isinstance(model.config, LxmertConfig):
            # This is an bimodal model, we need to find a more consistent way
            # to switch on those models.
-            return
+            return None, None
        question_answerer = QuestionAnsweringPipeline(model, tokenizer)
        examples = [
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
            {"question": "In what field is HuggingFace ?", "context": "HuggingFace is  an AI startup."},
        ]
        return question_answerer, examples
    def run_pipeline_test(self, question_answerer, _):
        outputs = question_answerer(
            question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
        )
--- a/tests/test_pipelines_summarization.py
+++ b/tests/test_pipelines_summarization.py
@@ -36,8 +36,12 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
        return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]
    def run_pipeline_test(self, summarizer, _):
        model = summarizer.model
        outputs = summarizer("(CNN)The Palestinian Authority officially became")
        self.assertEqual(outputs, [{"summary_text": ANY(str)}])
--- a/tests/test_pipelines_text2text_generation.py
+++ b/tests/test_pipelines_text2text_generation.py
@@ -30,9 +30,11 @@ class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTest
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
        return generator, ["Something to write", "Something else"]
    def run_pipeline_test(self, generator, _):
        outputs = generator("Something there")
        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
        # These are encoder decoder, they don't just append to incoming string
--- a/tests/test_pipelines_text_classification.py
+++ b/tests/test_pipelines_text_classification.py
@@ -72,9 +72,12 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC
        outputs = text_classifier("Birds are a type of animal")
        self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
        return text_classifier, ["HuggingFace is in", "This is another test"]
    def run_pipeline_test(self, text_classifier, _):
        model = text_classifier.model
        # Small inputs because BartTokenizer tiny has maximum position embeddings = 22
        valid_inputs = "HuggingFace is in"
        outputs = text_classifier(valid_inputs)
--- a/tests/test_pipelines_text_generation.py
+++ b/tests/test_pipelines_text_generation.py
@@ -88,8 +88,14 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
            ],
        )
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
        return text_generator, ["This is a test", "Another test"]
    def run_pipeline_test(self, text_generator, _):
        model = text_generator.model
        tokenizer = text_generator.tokenizer
        outputs = text_generator("This is a test")
        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
        self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
--- a/tests/test_pipelines_token_classification.py
+++ b/tests/test_pipelines_token_classification.py
@@ -45,8 +45,13 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
    model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
        return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]
    def run_pipeline_test(self, token_classifier, _):
        model = token_classifier.model
        tokenizer = token_classifier.tokenizer
        outputs = token_classifier("A simple string")
        self.assertIsInstance(outputs, list)
--- a/tests/test_pipelines_translation.py
+++ b/tests/test_pipelines_translation.py
@@ -20,6 +20,7 @@ from transformers import (
    MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
    TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
    MBart50TokenizerFast,
    MBartConfig,
    MBartForConditionalGeneration,
    TranslationPipeline,
    pipeline,
@@ -34,14 +35,16 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
        if isinstance(model.config, MBartConfig):
            src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
            translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
        else:
            translator = TranslationPipeline(model=model, tokenizer=tokenizer)
-        try:
+        return translator, ["Some string", "Some other text"]
    def run_pipeline_test(self, translator, _):
        outputs = translator("Some string")
        except ValueError:
            # Triggered by m2m langages
            src_lang, tgt_lang = list(translator.tokenizer.lang_code_to_id.keys())[:2]
            outputs = translator("Some string", src_lang=src_lang, tgt_lang=tgt_lang)
        self.assertEqual(outputs, [{"translation_text": ANY(str)}])
    @require_torch
--- a/tests/test_pipelines_zero_shot.py
+++ b/tests/test_pipelines_zero_shot.py
@@ -31,9 +31,13 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT
    model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
-    def run_pipeline_test(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
-        classifier = ZeroShotClassificationPipeline(model=model, tokenizer=tokenizer)
+        classifier = ZeroShotClassificationPipeline(
            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
        )
        return classifier, ["Who are you voting for in 2020?", "My stomach hurts."]
    def run_pipeline_test(self, classifier, _):
        outputs = classifier("Who are you voting for in 2020?", candidate_labels="politics")
        self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})