[Test refactor 1/5] Per-folder tests reorganization (#15725)

* Per-folder tests reorganization Co-authored-by: sgugger <sylvain.gugger@gmail.com> Co-authored-by: Stas Bekman <stas@stason.org>
2022-02-23 15:46:28 -05:00
parent fecb08c2b8
commit 29c10a41d0
438 changed files with 636 additions and 565 deletions
--- a/tests/pipelines/init.py
+++ b/tests/pipelines/init.py
--- a/tests/pipelines/test_pipelines_audio_classification.py
+++ b/tests/pipelines/test_pipelines_audio_classification.py
@@ -0,0 +1,126 @@
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+from transformers import MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
+from transformers.pipelines import AudioClassificationPipeline, pipeline
+from transformers.testing_utils import (
+    is_pipeline_test,
+    nested_simplify,
+    require_tf,
+    require_torch,
+    require_torchaudio,
+    slow,
+)
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+@is_pipeline_test
+@require_torch
+class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor)
+
+        # test with a raw waveform
+        audio = np.zeros((34000,))
+        audio2 = np.zeros((14000,))
+        return audio_classifier, [audio2, audio]
+
+    def run_pipeline_test(self, audio_classifier, examples):
+        audio2, audio = examples
+        output = audio_classifier(audio)
+        # by default a model is initialized with num_labels=2
+        self.assertEqual(
+            output,
+            [
+                {"score": ANY(float), "label": ANY(str)},
+                {"score": ANY(float), "label": ANY(str)},
+            ],
+        )
+        output = audio_classifier(audio, top_k=1)
+        self.assertEqual(
+            output,
+            [
+                {"score": ANY(float), "label": ANY(str)},
+            ],
+        )
+
+        self.run_torchaudio(audio_classifier)
+
+    @require_torchaudio
+    def run_torchaudio(self, audio_classifier):
+        import datasets
+
+        # test with a local file
+        dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        audio = dataset[0]["audio"]["array"]
+        output = audio_classifier(audio)
+        self.assertEqual(
+            output,
+            [
+                {"score": ANY(float), "label": ANY(str)},
+                {"score": ANY(float), "label": ANY(str)},
+            ],
+        )
+
+    @require_torch
+    def test_small_model_pt(self):
+        model = "anton-l/wav2vec2-random-tiny-classifier"
+
+        audio_classifier = pipeline("audio-classification", model=model)
+
+        audio = np.ones((8000,))
+        output = audio_classifier(audio, top_k=4)
+        self.assertEqual(
+            nested_simplify(output, decimals=4),
+            [
+                {"score": 0.0842, "label": "no"},
+                {"score": 0.0838, "label": "up"},
+                {"score": 0.0837, "label": "go"},
+                {"score": 0.0834, "label": "right"},
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_large_model_pt(self):
+        import datasets
+
+        model = "superb/wav2vec2-base-superb-ks"
+
+        audio_classifier = pipeline("audio-classification", model=model)
+        dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
+
+        audio = np.array(dataset[3]["speech"], dtype=np.float32)
+        output = audio_classifier(audio, top_k=4)
+        self.assertEqual(
+            nested_simplify(output, decimals=3),
+            [
+                {"score": 0.981, "label": "go"},
+                {"score": 0.007, "label": "up"},
+                {"score": 0.006, "label": "_unknown_"},
+                {"score": 0.001, "label": "down"},
+            ],
+        )
+
+    @require_tf
+    @unittest.skip("Audio classification is not implemented for TF")
+    def test_small_model_tf(self):
+        pass
--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -0,0 +1,643 @@
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import pytest
+from datasets import load_dataset
+
+from huggingface_hub import snapshot_download
+from transformers import (
+    MODEL_FOR_CTC_MAPPING,
+    MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
+    AutoFeatureExtractor,
+    AutoTokenizer,
+    Speech2TextForConditionalGeneration,
+    Wav2Vec2ForCTC,
+)
+from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline
+from transformers.pipelines.audio_utils import chunk_bytes_iter
+from transformers.pipelines.automatic_speech_recognition import apply_stride, chunk_iter
+from transformers.testing_utils import (
+    is_pipeline_test,
+    is_torch_available,
+    nested_simplify,
+    require_pyctcdecode,
+    require_tf,
+    require_torch,
+    require_torchaudio,
+    slow,
+)
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+if is_torch_available():
+    import torch
+
+
+# We can't use this mixin because it assumes TF support.
+# from .test_pipelines_common import CustomInputPipelineCommonMixin
+
+
+@is_pipeline_test
+class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = {
+        k: v
+        for k, v in (list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING.items()) if MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING else [])
+        + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
+    }
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if tokenizer is None:
+            # Side effect of no Fast Tokenizer class for these model, so skipping
+            # But the slow tokenizer test should still run as they're quite small
+            self.skipTest("No tokenizer available")
+            return
+            # return None, None
+
+        speech_recognizer = AutomaticSpeechRecognitionPipeline(
+            model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
+        )
+
+        # test with a raw waveform
+        audio = np.zeros((34000,))
+        audio2 = np.zeros((14000,))
+        return speech_recognizer, [audio, audio2]
+
+    def run_pipeline_test(self, speech_recognizer, examples):
+        audio = np.zeros((34000,))
+        outputs = speech_recognizer(audio)
+        self.assertEqual(outputs, {"text": ANY(str)})
+
+        audio = {"raw": audio, "stride": (0, 4000), "sampling_rate": speech_recognizer.feature_extractor.sampling_rate}
+        if speech_recognizer.type == "ctc":
+            outputs = speech_recognizer(audio)
+            self.assertEqual(outputs, {"text": ANY(str)})
+        else:
+            # Non CTC models cannot use striding.
+            with self.assertRaises(ValueError):
+                outputs = speech_recognizer(audio)
+
+    @require_torch
+    @slow
+    def test_pt_defaults(self):
+        pipeline("automatic-speech-recognition", framework="pt")
+
+    @require_torch
+    def test_small_model_pt(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="facebook/s2t-small-mustc-en-fr-st",
+            tokenizer="facebook/s2t-small-mustc-en-fr-st",
+            framework="pt",
+        )
+        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
+        output = speech_recognizer(waveform)
+        self.assertEqual(output, {"text": "(Applaudissements)"})
+
+    @require_torch
+    def test_small_model_pt_seq2seq(self):
+        model_id = "hf-internal-testing/tiny-random-speech-encoder-decoder"
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
+
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model=model_id,
+            tokenizer=tokenizer,
+            feature_extractor=feature_extractor,
+            framework="pt",
+        )
+
+        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
+        output = speech_recognizer(waveform)
+        self.assertEqual(output, {"text": "あл ش 湯 清 ه ܬ া लᆨしث ल eか u w 全 u"})
+
+    @slow
+    @require_torch
+    @require_pyctcdecode
+    def test_large_model_pt_with_lm(self):
+        dataset = load_dataset("Narsil/asr_dummy")
+        filename = dataset["test"][3]["file"]
+
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm",
+            framework="pt",
+        )
+        self.assertEqual(speech_recognizer.type, "ctc_with_lm")
+
+        output = speech_recognizer(filename)
+        self.assertEqual(
+            output,
+            {"text": "y en las ramas medio sumergidas revoloteaban algunos pájaros de quimérico y legendario plumaje"},
+        )
+
+        # Override back to pure CTC
+        speech_recognizer.type = "ctc"
+        output = speech_recognizer(filename)
+        # plumajre != plumaje
+        self.assertEqual(
+            output,
+            {
+                "text": "y en las ramas medio sumergidas revoloteaban algunos pájaros de quimérico y legendario plumajre"
+            },
+        )
+
+    @require_tf
+    def test_small_model_tf(self):
+        self.skipTest("Tensorflow not supported yet.")
+
+    @require_torch
+    def test_torch_small_no_tokenizer_files(self):
+        # test that model without tokenizer file cannot be loaded
+        with pytest.raises(OSError):
+            pipeline(
+                task="automatic-speech-recognition",
+                model="patrickvonplaten/tiny-wav2vec2-no-tokenizer",
+                framework="pt",
+            )
+
+    @require_torch
+    @slow
+    def test_torch_large(self):
+
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="facebook/wav2vec2-base-960h",
+            tokenizer="facebook/wav2vec2-base-960h",
+            framework="pt",
+        )
+        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
+        output = speech_recognizer(waveform)
+        self.assertEqual(output, {"text": ""})
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        filename = ds[40]["file"]
+        output = speech_recognizer(filename)
+        self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
+
+    @require_torch
+    @slow
+    def test_torch_speech_encoder_decoder(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="facebook/s2t-wav2vec2-large-en-de",
+            feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+            framework="pt",
+        )
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        filename = ds[40]["file"]
+        output = speech_recognizer(filename)
+        self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
+
+    @slow
+    @require_torch
+    def test_simple_wav2vec2(self):
+        model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+        tokenizer = AutoTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
+        feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
+
+        asr = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
+
+        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
+        output = asr(waveform)
+        self.assertEqual(output, {"text": ""})
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        filename = ds[40]["file"]
+        output = asr(filename)
+        self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
+
+        filename = ds[40]["file"]
+        with open(filename, "rb") as f:
+            data = f.read()
+        output = asr(data)
+        self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
+
+    @slow
+    @require_torch
+    @require_torchaudio
+    def test_simple_s2t(self):
+
+        model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-mustc-en-it-st")
+        tokenizer = AutoTokenizer.from_pretrained("facebook/s2t-small-mustc-en-it-st")
+        feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/s2t-small-mustc-en-it-st")
+
+        asr = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
+
+        waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
+
+        output = asr(waveform)
+        self.assertEqual(output, {"text": "(Applausi)"})
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        filename = ds[40]["file"]
+        output = asr(filename)
+        self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
+
+        filename = ds[40]["file"]
+        with open(filename, "rb") as f:
+            data = f.read()
+        output = asr(data)
+        self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
+
+    @slow
+    @require_torch
+    @require_torchaudio
+    def test_xls_r_to_en(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="facebook/wav2vec2-xls-r-1b-21-to-en",
+            feature_extractor="facebook/wav2vec2-xls-r-1b-21-to-en",
+            framework="pt",
+        )
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        filename = ds[40]["file"]
+        output = speech_recognizer(filename)
+        self.assertEqual(output, {"text": "A man said to the universe: “Sir, I exist."})
+
+    @slow
+    @require_torch
+    @require_torchaudio
+    def test_xls_r_from_en(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="facebook/wav2vec2-xls-r-1b-en-to-15",
+            feature_extractor="facebook/wav2vec2-xls-r-1b-en-to-15",
+            framework="pt",
+        )
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        filename = ds[40]["file"]
+        output = speech_recognizer(filename)
+        self.assertEqual(output, {"text": "Ein Mann sagte zu dem Universum, Sir, ich bin da."})
+
+    @slow
+    @require_torch
+    @require_torchaudio
+    def test_speech_to_text_leveraged(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="patrickvonplaten/wav2vec2-2-bart-base",
+            feature_extractor="patrickvonplaten/wav2vec2-2-bart-base",
+            tokenizer=AutoTokenizer.from_pretrained("patrickvonplaten/wav2vec2-2-bart-base"),
+            framework="pt",
+        )
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        filename = ds[40]["file"]
+        output = speech_recognizer(filename)
+        self.assertEqual(output, {"text": "a man said to the universe sir i exist"})
+
+    @require_torch
+    def test_chunking_fast(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="hf-internal-testing/tiny-random-wav2vec2",
+            chunk_length_s=10.0,
+        )
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        audio = ds[40]["audio"]["array"]
+
+        n_repeats = 2
+        audio_tiled = np.tile(audio, n_repeats)
+        output = speech_recognizer([audio_tiled], batch_size=2)
+        self.assertEqual(output, [{"text": ANY(str)}])
+        self.assertEqual(output[0]["text"][:6], "ZBT ZC")
+
+    @require_torch
+    @require_pyctcdecode
+    def test_chunking_fast_with_lm(self):
+        speech_recognizer = pipeline(
+            model="hf-internal-testing/processor_with_lm",
+            chunk_length_s=10.0,
+        )
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        audio = ds[40]["audio"]["array"]
+
+        n_repeats = 2
+        audio_tiled = np.tile(audio, n_repeats)
+        # Batch_size = 1
+        output1 = speech_recognizer([audio_tiled], batch_size=1)
+        self.assertEqual(output1, [{"text": ANY(str)}])
+        self.assertEqual(output1[0]["text"][:6], "<s> <s")
+
+        # batch_size = 2
+        output2 = speech_recognizer([audio_tiled], batch_size=2)
+        self.assertEqual(output2, [{"text": ANY(str)}])
+        self.assertEqual(output2[0]["text"][:6], "<s> <s")
+
+        # TODO There is an offby one error because of the ratio.
+        # Maybe logits get affected by the padding on this random
+        # model is more likely. Add some masking ?
+        # self.assertEqual(output1, output2)
+
+    @require_torch
+    @require_pyctcdecode
+    def test_with_lm_fast(self):
+        speech_recognizer = pipeline(
+            model="hf-internal-testing/processor_with_lm",
+        )
+        self.assertEqual(speech_recognizer.type, "ctc_with_lm")
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        audio = ds[40]["audio"]["array"]
+
+        n_repeats = 2
+        audio_tiled = np.tile(audio, n_repeats)
+
+        output = speech_recognizer([audio_tiled], batch_size=2)
+        self.assertEqual(output, [{"text": ANY(str)}])
+        self.assertEqual(output[0]["text"][:6], "<s> <s")
+
+        # Making sure the argument are passed to the decoder
+        # Since no change happens in the result, check the error comes from
+        # the `decode_beams` function.
+        with self.assertRaises(TypeError) as e:
+            output = speech_recognizer([audio_tiled], decoder_kwargs={"num_beams": 2})
+            self.assertContains(e.msg, "TypeError: decode_beams() got an unexpected keyword argument 'num_beams'")
+        output = speech_recognizer([audio_tiled], decoder_kwargs={"beam_width": 2})
+
+    @require_torch
+    @require_pyctcdecode
+    def test_with_local_lm_fast(self):
+        local_dir = snapshot_download("hf-internal-testing/processor_with_lm")
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model=local_dir,
+        )
+        self.assertEqual(speech_recognizer.type, "ctc_with_lm")
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        audio = ds[40]["audio"]["array"]
+
+        n_repeats = 2
+        audio_tiled = np.tile(audio, n_repeats)
+
+        output = speech_recognizer([audio_tiled], batch_size=2)
+
+        self.assertEqual(output, [{"text": ANY(str)}])
+        self.assertEqual(output[0]["text"][:6], "<s> <s")
+
+    @require_torch
+    @slow
+    def test_chunking(self):
+        model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+        tokenizer = AutoTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
+        feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model=model,
+            tokenizer=tokenizer,
+            feature_extractor=feature_extractor,
+            framework="pt",
+            chunk_length_s=10.0,
+        )
+
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        audio = ds[40]["audio"]["array"]
+
+        n_repeats = 10
+        audio = np.tile(audio, n_repeats)
+        output = speech_recognizer([audio], batch_size=2)
+        expected_text = "A MAN SAID TO THE UNIVERSE SIR I EXIST " * n_repeats
+        expected = [{"text": expected_text.strip()}]
+        self.assertEqual(output, expected)
+
+    @require_torch
+    @slow
+    def test_chunking_with_lm(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="patrickvonplaten/wav2vec2-base-100h-with-lm",
+            chunk_length_s=10.0,
+        )
+        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+        audio = ds[40]["audio"]["array"]
+
+        n_repeats = 10
+        audio = np.tile(audio, n_repeats)
+        output = speech_recognizer([audio], batch_size=2)
+        expected_text = "A MAN SAID TO THE UNIVERSE SIR I EXIST " * n_repeats
+        expected = [{"text": expected_text.strip()}]
+        self.assertEqual(output, expected)
+
+    @require_torch
+    def test_chunk_iterator(self):
+        feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
+        inputs = torch.arange(100).long()
+
+        outs = list(chunk_iter(inputs, feature_extractor, 100, 0, 0))
+        self.assertEqual(len(outs), 1)
+        self.assertEqual([o["stride"] for o in outs], [(100, 0, 0)])
+        self.assertEqual([o["input_values"].shape for o in outs], [(1, 100)])
+        self.assertEqual([o["is_last"] for o in outs], [True])
+
+        # two chunks no stride
+        outs = list(chunk_iter(inputs, feature_extractor, 50, 0, 0))
+        self.assertEqual(len(outs), 2)
+        self.assertEqual([o["stride"] for o in outs], [(50, 0, 0), (50, 0, 0)])
+        self.assertEqual([o["input_values"].shape for o in outs], [(1, 50), (1, 50)])
+        self.assertEqual([o["is_last"] for o in outs], [False, True])
+
+        # two chunks incomplete last
+        outs = list(chunk_iter(inputs, feature_extractor, 80, 0, 0))
+        self.assertEqual(len(outs), 2)
+        self.assertEqual([o["stride"] for o in outs], [(80, 0, 0), (20, 0, 0)])
+        self.assertEqual([o["input_values"].shape for o in outs], [(1, 80), (1, 20)])
+        self.assertEqual([o["is_last"] for o in outs], [False, True])
+
+    @require_torch
+    def test_chunk_iterator_stride(self):
+        feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
+        inputs = torch.arange(100).long()
+        input_values = feature_extractor(inputs, sampling_rate=feature_extractor.sampling_rate, return_tensors="pt")[
+            "input_values"
+        ]
+
+        outs = list(chunk_iter(inputs, feature_extractor, 100, 20, 10))
+        self.assertEqual(len(outs), 2)
+        self.assertEqual([o["stride"] for o in outs], [(100, 0, 10), (30, 20, 0)])
+        self.assertEqual([o["input_values"].shape for o in outs], [(1, 100), (1, 30)])
+        self.assertEqual([o["is_last"] for o in outs], [False, True])
+
+        outs = list(chunk_iter(inputs, feature_extractor, 80, 20, 10))
+        self.assertEqual(len(outs), 2)
+        self.assertEqual([o["stride"] for o in outs], [(80, 0, 10), (50, 20, 0)])
+        self.assertEqual([o["input_values"].shape for o in outs], [(1, 80), (1, 50)])
+        self.assertEqual([o["is_last"] for o in outs], [False, True])
+
+        outs = list(chunk_iter(inputs, feature_extractor, 90, 20, 0))
+        self.assertEqual(len(outs), 2)
+        self.assertEqual([o["stride"] for o in outs], [(90, 0, 0), (30, 20, 0)])
+        self.assertEqual([o["input_values"].shape for o in outs], [(1, 90), (1, 30)])
+
+        inputs = torch.LongTensor([i % 2 for i in range(100)])
+        input_values = feature_extractor(inputs, sampling_rate=feature_extractor.sampling_rate, return_tensors="pt")[
+            "input_values"
+        ]
+        outs = list(chunk_iter(inputs, feature_extractor, 30, 5, 5))
+        self.assertEqual(len(outs), 5)
+        self.assertEqual([o["stride"] for o in outs], [(30, 0, 5), (30, 5, 5), (30, 5, 5), (30, 5, 5), (20, 5, 0)])
+        self.assertEqual([o["input_values"].shape for o in outs], [(1, 30), (1, 30), (1, 30), (1, 30), (1, 20)])
+        self.assertEqual([o["is_last"] for o in outs], [False, False, False, False, True])
+        # (0, 25)
+        self.assertEqual(nested_simplify(input_values[:, :30]), nested_simplify(outs[0]["input_values"]))
+        # (25, 45)
+        self.assertEqual(nested_simplify(input_values[:, 20:50]), nested_simplify(outs[1]["input_values"]))
+        # (45, 65)
+        self.assertEqual(nested_simplify(input_values[:, 40:70]), nested_simplify(outs[2]["input_values"]))
+        # (65, 85)
+        self.assertEqual(nested_simplify(input_values[:, 60:90]), nested_simplify(outs[3]["input_values"]))
+        # (85, 100)
+        self.assertEqual(nested_simplify(input_values[:, 80:100]), nested_simplify(outs[4]["input_values"]))
+
+    @require_torch
+    def test_stride(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="hf-internal-testing/tiny-random-wav2vec2",
+        )
+        waveform = np.tile(np.arange(1000, dtype=np.float32), 10)
+        output = speech_recognizer({"raw": waveform, "stride": (0, 0), "sampling_rate": 16_000})
+        self.assertEqual(output, {"text": "OB XB  B EB BB  B EB B OB X"})
+
+        # 0 effective ids Just take the middle one
+        output = speech_recognizer({"raw": waveform, "stride": (5000, 5000), "sampling_rate": 16_000})
+        self.assertEqual(output, {"text": "B"})
+
+        # Only 1 arange.
+        output = speech_recognizer({"raw": waveform, "stride": (0, 9000), "sampling_rate": 16_000})
+        self.assertEqual(output, {"text": "O"})
+
+        # 2nd arange
+        output = speech_recognizer({"raw": waveform, "stride": (1000, 8000), "sampling_rate": 16_000})
+        self.assertEqual(output, {"text": "B XB"})
+
+
+@require_torch
+class ApplyStrideTest(unittest.TestCase):
+    def test_apply_stride(self):
+        tokens = torch.arange(10).long().reshape((2, 5))
+
+        # No stride
+        apply_stride(tokens, [(100, 0, 0), (100, 0, 0)])
+
+        expected = torch.arange(10).long().reshape((2, 5))
+        self.assertEqual(expected.tolist(), tokens.tolist())
+
+    def test_apply_stride_real_stride(self):
+        # Stride aligned
+        tokens = torch.arange(10).long().reshape((2, 5))
+        apply_stride(tokens, [(100, 20, 0), (100, 0, 20)])
+        self.assertEqual([[1, 1, 2, 3, 4], [5, 6, 7, 8, 8]], tokens.tolist())
+
+        # Stride rounded
+        tokens = torch.arange(10).long().reshape((2, 5))
+        apply_stride(tokens, [(100, 15, 0), (100, 0, 15)])
+        self.assertEqual([[1, 1, 2, 3, 4], [5, 6, 7, 8, 8]], tokens.tolist())
+
+        # No stride rounded
+        tokens = torch.arange(10).long().reshape((2, 5))
+        apply_stride(tokens, [(100, 5, 0), (100, 0, 5)])
+        self.assertEqual([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], tokens.tolist())
+
+    def test_apply_stride_with_padding(self):
+        # Stride aligned
+        tokens = torch.arange(10).long().reshape((2, 5))
+        apply_stride(tokens, [(100, 20, 0), (60, 0, 20)])
+        self.assertEqual([[1, 1, 2, 3, 4], [5, 6, 6, 6, 6]], tokens.tolist())
+
+
+def require_ffmpeg(test_case):
+    """
+    Decorator marking a test that requires FFmpeg.
+
+    These tests are skipped when FFmpeg isn't installed.
+
+    """
+    import subprocess
+
+    try:
+        subprocess.check_output(["ffmpeg", "-h"], stderr=subprocess.DEVNULL)
+        return test_case
+    except Exception:
+        return unittest.skip("test requires ffmpeg")(test_case)
+
+
+def bytes_iter(chunk_size, chunks):
+    for i in range(chunks):
+        yield bytes(range(i * chunk_size, (i + 1) * chunk_size))
+
+
+@require_ffmpeg
+class AudioUtilsTest(unittest.TestCase):
+    def test_chunk_bytes_iter_too_big(self):
+        iter_ = iter(chunk_bytes_iter(bytes_iter(chunk_size=3, chunks=2), 10, stride=(0, 0)))
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02\x03\x04\x05", "stride": (0, 0)})
+        with self.assertRaises(StopIteration):
+            next(iter_)
+
+    def test_chunk_bytes_iter(self):
+        iter_ = iter(chunk_bytes_iter(bytes_iter(chunk_size=3, chunks=2), 3, stride=(0, 0)))
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02", "stride": (0, 0)})
+        self.assertEqual(next(iter_), {"raw": b"\x03\x04\x05", "stride": (0, 0)})
+        with self.assertRaises(StopIteration):
+            next(iter_)
+
+    def test_chunk_bytes_iter_stride(self):
+        iter_ = iter(chunk_bytes_iter(bytes_iter(chunk_size=3, chunks=2), 3, stride=(1, 1)))
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02", "stride": (0, 1)})
+        self.assertEqual(next(iter_), {"raw": b"\x01\x02\x03", "stride": (1, 1)})
+        self.assertEqual(next(iter_), {"raw": b"\x02\x03\x04", "stride": (1, 1)})
+        # This is finished, but the chunk_bytes doesn't know it yet.
+        self.assertEqual(next(iter_), {"raw": b"\x03\x04\x05", "stride": (1, 1)})
+        self.assertEqual(next(iter_), {"raw": b"\x04\x05", "stride": (1, 0)})
+        with self.assertRaises(StopIteration):
+            next(iter_)
+
+    def test_chunk_bytes_iter_stride_stream(self):
+        iter_ = iter(chunk_bytes_iter(bytes_iter(chunk_size=3, chunks=2), 5, stride=(1, 1), stream=True))
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02", "stride": (0, 0), "partial": True})
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02\x03\x04", "stride": (0, 1), "partial": False})
+        self.assertEqual(next(iter_), {"raw": b"\x03\x04\x05", "stride": (1, 0), "partial": False})
+        with self.assertRaises(StopIteration):
+            next(iter_)
+
+        iter_ = iter(chunk_bytes_iter(bytes_iter(chunk_size=3, chunks=3), 5, stride=(1, 1), stream=True))
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02", "stride": (0, 0), "partial": True})
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02\x03\x04", "stride": (0, 1), "partial": False})
+        self.assertEqual(next(iter_), {"raw": b"\x03\x04\x05\x06\x07", "stride": (1, 1), "partial": False})
+        self.assertEqual(next(iter_), {"raw": b"\x06\x07\x08", "stride": (1, 0), "partial": False})
+        with self.assertRaises(StopIteration):
+            next(iter_)
+
+        iter_ = iter(chunk_bytes_iter(bytes_iter(chunk_size=3, chunks=3), 10, stride=(1, 1), stream=True))
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02", "stride": (0, 0), "partial": True})
+        self.assertEqual(next(iter_), {"raw": b"\x00\x01\x02\x03\x04\x05", "stride": (0, 0), "partial": True})
+        self.assertEqual(
+            next(iter_), {"raw": b"\x00\x01\x02\x03\x04\x05\x06\x07\x08", "stride": (0, 0), "partial": True}
+        )
+        self.assertEqual(
+            next(iter_), {"raw": b"\x00\x01\x02\x03\x04\x05\x06\x07\x08", "stride": (0, 0), "partial": False}
+        )
+        with self.assertRaises(StopIteration):
+            next(iter_)
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -0,0 +1,607 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import importlib
+import logging
+import random
+import string
+import unittest
+from abc import abstractmethod
+from functools import lru_cache
+from unittest import skipIf
+
+from transformers import (
+    FEATURE_EXTRACTOR_MAPPING,
+    TOKENIZER_MAPPING,
+    AutoFeatureExtractor,
+    AutoTokenizer,
+    DistilBertForSequenceClassification,
+    IBertConfig,
+    RobertaConfig,
+    TextClassificationPipeline,
+    pipeline,
+)
+from transformers.pipelines import get_task
+from transformers.pipelines.base import _pad
+from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_checkpoint_from_architecture(architecture):
+    try:
+        module = importlib.import_module(architecture.__module__)
+    except ImportError:
+        logger.error(f"Ignoring architecture {architecture}")
+        return
+
+    if hasattr(module, "_CHECKPOINT_FOR_DOC"):
+        return module._CHECKPOINT_FOR_DOC
+    else:
+        logger.warning(f"Can't retrieve checkpoint from {architecture.__name__}")
+
+
+def get_tiny_config_from_class(configuration_class):
+    if "OpenAIGPT" in configuration_class.__name__:
+        # This is the only file that is inconsistent with the naming scheme.
+        # Will rename this file if we decide this is the way to go
+        return
+
+    model_type = configuration_class.model_type
+    camel_case_model_name = configuration_class.__name__.split("Config")[0]
+
+    try:
+        module = importlib.import_module(f".test_modeling_{model_type.replace('-', '_')}", package="tests")
+        model_tester_class = getattr(module, f"{camel_case_model_name}ModelTester", None)
+    except (ImportError, AttributeError):
+        logger.error(f"No model tester class for {configuration_class.__name__}")
+        return
+
+    if model_tester_class is None:
+        logger.warning(f"No model tester class for {configuration_class.__name__}")
+        return
+
+    model_tester = model_tester_class(parent=None)
+
+    if hasattr(model_tester, "get_pipeline_config"):
+        config = model_tester.get_pipeline_config()
+    elif hasattr(model_tester, "get_config"):
+        config = model_tester.get_config()
+    else:
+        config = None
+        logger.warning(f"Model tester {model_tester_class.__name__} has no `get_config()`.")
+
+    return config
+
+
+@lru_cache(maxsize=100)
+def get_tiny_tokenizer_from_checkpoint(checkpoint):
+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+    if tokenizer.vocab_size < 300:
+        # Wav2Vec2ForCTC for instance
+        # ByT5Tokenizer
+        # all are already small enough and have no Fast version that can
+        # be retrained
+        return tokenizer
+    logger.info("Training new from iterator ...")
+    vocabulary = string.ascii_letters + string.digits + " "
+    tokenizer = tokenizer.train_new_from_iterator(vocabulary, vocab_size=len(vocabulary), show_progress=False)
+    logger.info("Trained.")
+    return tokenizer
+
+
+def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_extractor_class):
+    try:
+        feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint)
+    except Exception:
+        try:
+            if feature_extractor_class is not None:
+                feature_extractor = feature_extractor_class()
+            else:
+                feature_extractor = None
+        except Exception:
+            feature_extractor = None
+    if hasattr(tiny_config, "image_size") and feature_extractor:
+        feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
+
+    # Speech2TextModel specific.
+    if hasattr(tiny_config, "input_feat_per_channel") and feature_extractor:
+        feature_extractor = feature_extractor.__class__(
+            feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
+        )
+    return feature_extractor
+
+
+class ANY:
+    def __init__(self, *_types):
+        self._types = _types
+
+    def __eq__(self, other):
+        return isinstance(other, self._types)
+
+    def __repr__(self):
+        return f"ANY({', '.join(_type.__name__ for _type in self._types)})"
+
+
+class PipelineTestCaseMeta(type):
+    def __new__(mcs, name, bases, dct):
+        def gen_test(ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class):
+            @skipIf(tiny_config is None, "TinyConfig does not exist")
+            @skipIf(checkpoint is None, "checkpoint does not exist")
+            def test(self):
+                if ModelClass.__name__.endswith("ForCausalLM"):
+                    tiny_config.is_encoder_decoder = False
+                    if hasattr(tiny_config, "encoder_no_repeat_ngram_size"):
+                        # specific for blenderbot which supports both decoder-only
+                        # encoder/decoder but the test config  only reflects
+                        # encoder/decoder arch
+                        tiny_config.encoder_no_repeat_ngram_size = 0
+                if ModelClass.__name__.endswith("WithLMHead"):
+                    tiny_config.is_decoder = True
+                try:
+                    model = ModelClass(tiny_config)
+                except ImportError as e:
+                    self.skipTest(
+                        f"Cannot run with {tiny_config} as the model requires a library that isn't installed: {e}"
+                    )
+                if hasattr(model, "eval"):
+                    model = model.eval()
+                if tokenizer_class is not None:
+                    try:
+                        tokenizer = get_tiny_tokenizer_from_checkpoint(checkpoint)
+                        # XLNet actually defines it as -1.
+                        if isinstance(model.config, (RobertaConfig, IBertConfig)):
+                            tokenizer.model_max_length = model.config.max_position_embeddings - 2
+                        elif (
+                            hasattr(model.config, "max_position_embeddings")
+                            and model.config.max_position_embeddings > 0
+                        ):
+                            tokenizer.model_max_length = model.config.max_position_embeddings
+                    # Rust Panic exception are NOT Exception subclass
+                    # Some test tokenizer contain broken vocabs or custom PreTokenizer, so we
+                    # provide some default tokenizer and hope for the best.
+                    except:  # noqa: E722
+                        self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
+                else:
+                    tokenizer = None
+                feature_extractor = get_tiny_feature_extractor_from_checkpoint(
+                    checkpoint, tiny_config, feature_extractor_class
+                )
+
+                if tokenizer is None and feature_extractor is None:
+                    self.skipTest(
+                        f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with no FastTokenizer ?)"
+                    )
+                pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor)
+                if pipeline is None:
+                    # The test can disable itself, but it should be very marginal
+                    # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
+                    return
+                self.run_pipeline_test(pipeline, examples)
+
+                def run_batch_test(pipeline, examples):
+                    # Need to copy because `Conversation` are stateful
+                    if pipeline.tokenizer is not None and pipeline.tokenizer.pad_token_id is None:
+                        return  # No batching for this and it's OK
+
+                    # 10 examples with batch size 4 means there needs to be a unfinished batch
+                    # which is important for the unbatcher
+                    def data(n):
+                        for _ in range(n):
+                            # Need to copy because Conversation object is mutated
+                            yield copy.deepcopy(random.choice(examples))
+
+                    out = []
+                    for item in pipeline(data(10), batch_size=4):
+                        out.append(item)
+                    self.assertEqual(len(out), 10)
+
+                run_batch_test(pipeline, examples)
+
+            return test
+
+        for prefix, key in [("pt", "model_mapping"), ("tf", "tf_model_mapping")]:
+            mapping = dct.get(key, {})
+            if mapping:
+                for configuration, model_architectures in mapping.items():
+                    if not isinstance(model_architectures, tuple):
+                        model_architectures = (model_architectures,)
+
+                    for model_architecture in model_architectures:
+                        checkpoint = get_checkpoint_from_architecture(model_architecture)
+                        tiny_config = get_tiny_config_from_class(configuration)
+                        tokenizer_classes = TOKENIZER_MAPPING.get(configuration, [])
+                        feature_extractor_class = FEATURE_EXTRACTOR_MAPPING.get(configuration, None)
+                        feature_extractor_name = (
+                            feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor"
+                        )
+                        if not tokenizer_classes:
+                            # We need to test even if there are no tokenizers.
+                            tokenizer_classes = [None]
+                        else:
+                            # Remove the non defined tokenizers
+                            # ByT5 and Perceiver are bytes-level and don't define
+                            # FastTokenizer, we can just ignore those.
+                            tokenizer_classes = [
+                                tokenizer_class for tokenizer_class in tokenizer_classes if tokenizer_class is not None
+                            ]
+
+                        for tokenizer_class in tokenizer_classes:
+                            if tokenizer_class is not None:
+                                tokenizer_name = tokenizer_class.__name__
+                            else:
+                                tokenizer_name = "notokenizer"
+
+                            test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}"
+
+                            if tokenizer_class is not None or feature_extractor_class is not None:
+                                dct[test_name] = gen_test(
+                                    model_architecture,
+                                    checkpoint,
+                                    tiny_config,
+                                    tokenizer_class,
+                                    feature_extractor_class,
+                                )
+
+        @abstractmethod
+        def inner(self):
+            raise NotImplementedError("Not implemented test")
+
+        # Force these 2 methods to exist
+        dct["test_small_model_pt"] = dct.get("test_small_model_pt", inner)
+        dct["test_small_model_tf"] = dct.get("test_small_model_tf", inner)
+
+        return type.__new__(mcs, name, bases, dct)
+
+
+@is_pipeline_test
+class CommonPipelineTest(unittest.TestCase):
+    @require_torch
+    def test_pipeline_iteration(self):
+        from torch.utils.data import Dataset
+
+        class MyDataset(Dataset):
+            data = [
+                "This is a test",
+                "This restaurant is great",
+                "This restaurant is awful",
+            ]
+
+            def __len__(self):
+                return 3
+
+            def __getitem__(self, i):
+                return self.data[i]
+
+        text_classifier = pipeline(
+            task="text-classification", model="hf-internal-testing/tiny-random-distilbert", framework="pt"
+        )
+        dataset = MyDataset()
+        for output in text_classifier(dataset):
+            self.assertEqual(output, {"label": ANY(str), "score": ANY(float)})
+
+    @require_torch
+    def test_check_task_auto_inference(self):
+        pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
+
+        self.assertIsInstance(pipe, TextClassificationPipeline)
+
+    @require_torch
+    def test_pipeline_batch_size_global(self):
+        pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
+        self.assertEqual(pipe._batch_size, None)
+        self.assertEqual(pipe._num_workers, None)
+
+        pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert", batch_size=2, num_workers=1)
+        self.assertEqual(pipe._batch_size, 2)
+        self.assertEqual(pipe._num_workers, 1)
+
+    @require_torch
+    def test_pipeline_override(self):
+        class MyPipeline(TextClassificationPipeline):
+            pass
+
+        text_classifier = pipeline(model="hf-internal-testing/tiny-random-distilbert", pipeline_class=MyPipeline)
+
+        self.assertIsInstance(text_classifier, MyPipeline)
+
+    def test_check_task(self):
+        task = get_task("gpt2")
+        self.assertEqual(task, "text-generation")
+
+        with self.assertRaises(RuntimeError):
+            # Wrong framework
+            get_task("espnet/siddhana_slurp_entity_asr_train_asr_conformer_raw_en_word_valid.acc.ave_10best")
+
+    @require_torch
+    def test_iterator_data(self):
+        def data(n: int):
+            for _ in range(n):
+                yield "This is a test"
+
+        pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert")
+
+        results = []
+        for out in pipe(data(10)):
+            self.assertEqual(nested_simplify(out), {"label": "LABEL_0", "score": 0.504})
+            results.append(out)
+        self.assertEqual(len(results), 10)
+
+        # When using multiple workers on streamable data it should still work
+        # This will force using `num_workers=1` with a warning for now.
+        results = []
+        for out in pipe(data(10), num_workers=2):
+            self.assertEqual(nested_simplify(out), {"label": "LABEL_0", "score": 0.504})
+            results.append(out)
+        self.assertEqual(len(results), 10)
+
+    @require_tf
+    def test_iterator_data_tf(self):
+        def data(n: int):
+            for _ in range(n):
+                yield "This is a test"
+
+        pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert", framework="tf")
+        out = pipe("This is a test")
+        results = []
+        for out in pipe(data(10)):
+            self.assertEqual(nested_simplify(out), {"label": "LABEL_0", "score": 0.504})
+            results.append(out)
+        self.assertEqual(len(results), 10)
+
+    @require_torch
+    def test_unbatch_attentions_hidden_states(self):
+        model = DistilBertForSequenceClassification.from_pretrained(
+            "hf-internal-testing/tiny-random-distilbert", output_hidden_states=True, output_attentions=True
+        )
+        tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-distilbert")
+        text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
+
+        # Used to throw an error because `hidden_states` are a tuple of tensors
+        # instead of the expected tensor.
+        outputs = text_classifier(["This is great !"] * 20, batch_size=32)
+        self.assertEqual(len(outputs), 20)
+
+
+@is_pipeline_test
+class PipelinePadTest(unittest.TestCase):
+    @require_torch
+    def test_pipeline_padding(self):
+        import torch
+
+        items = [
+            {
+                "label": "label1",
+                "input_ids": torch.LongTensor([[1, 23, 24, 2]]),
+                "attention_mask": torch.LongTensor([[0, 1, 1, 0]]),
+            },
+            {
+                "label": "label2",
+                "input_ids": torch.LongTensor([[1, 23, 24, 43, 44, 2]]),
+                "attention_mask": torch.LongTensor([[0, 1, 1, 1, 1, 0]]),
+            },
+        ]
+
+        self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "input_ids", 10, "right"),
+                torch.LongTensor([[1, 23, 24, 2, 10, 10], [1, 23, 24, 43, 44, 2]]),
+            )
+        )
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "input_ids", 10, "left"),
+                torch.LongTensor([[10, 10, 1, 23, 24, 2], [1, 23, 24, 43, 44, 2]]),
+            )
+        )
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "attention_mask", 0, "right"), torch.LongTensor([[0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 0]])
+            )
+        )
+
+    @require_torch
+    def test_pipeline_image_padding(self):
+        import torch
+
+        items = [
+            {
+                "label": "label1",
+                "pixel_values": torch.zeros((1, 3, 10, 10)),
+            },
+            {
+                "label": "label2",
+                "pixel_values": torch.zeros((1, 3, 10, 10)),
+            },
+        ]
+
+        self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "pixel_values", 10, "right"),
+                torch.zeros((2, 3, 10, 10)),
+            )
+        )
+
+    @require_torch
+    def test_pipeline_offset_mapping(self):
+        import torch
+
+        items = [
+            {
+                "offset_mappings": torch.zeros([1, 11, 2], dtype=torch.long),
+            },
+            {
+                "offset_mappings": torch.zeros([1, 4, 2], dtype=torch.long),
+            },
+        ]
+
+        self.assertTrue(
+            torch.allclose(
+                _pad(items, "offset_mappings", 0, "right"),
+                torch.zeros((2, 11, 2), dtype=torch.long),
+            ),
+        )
+
+
+@is_pipeline_test
+@require_torch
+class PipelineUtilsTest(unittest.TestCase):
+    def test_pipeline_dataset(self):
+        from transformers.pipelines.pt_utils import PipelineDataset
+
+        dummy_dataset = [0, 1, 2, 3]
+
+        def add(number, extra=0):
+            return number + extra
+
+        dataset = PipelineDataset(dummy_dataset, add, {"extra": 2})
+        self.assertEqual(len(dataset), 4)
+        outputs = [dataset[i] for i in range(4)]
+        self.assertEqual(outputs, [2, 3, 4, 5])
+
+    def test_pipeline_iterator(self):
+        from transformers.pipelines.pt_utils import PipelineIterator
+
+        dummy_dataset = [0, 1, 2, 3]
+
+        def add(number, extra=0):
+            return number + extra
+
+        dataset = PipelineIterator(dummy_dataset, add, {"extra": 2})
+        self.assertEqual(len(dataset), 4)
+
+        outputs = [item for item in dataset]
+        self.assertEqual(outputs, [2, 3, 4, 5])
+
+    def test_pipeline_iterator_no_len(self):
+        from transformers.pipelines.pt_utils import PipelineIterator
+
+        def dummy_dataset():
+            for i in range(4):
+                yield i
+
+        def add(number, extra=0):
+            return number + extra
+
+        dataset = PipelineIterator(dummy_dataset(), add, {"extra": 2})
+        with self.assertRaises(TypeError):
+            len(dataset)
+
+        outputs = [item for item in dataset]
+        self.assertEqual(outputs, [2, 3, 4, 5])
+
+    def test_pipeline_batch_unbatch_iterator(self):
+        from transformers.pipelines.pt_utils import PipelineIterator
+
+        dummy_dataset = [{"id": [0, 1, 2]}, {"id": [3]}]
+
+        def add(number, extra=0):
+            return {"id": [i + extra for i in number["id"]]}
+
+        dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
+
+        outputs = [item for item in dataset]
+        self.assertEqual(outputs, [{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}])
+
+    def test_pipeline_batch_unbatch_iterator_tensors(self):
+        import torch
+
+        from transformers.pipelines.pt_utils import PipelineIterator
+
+        dummy_dataset = [{"id": torch.LongTensor([[10, 20], [0, 1], [0, 2]])}, {"id": torch.LongTensor([[3]])}]
+
+        def add(number, extra=0):
+            return {"id": number["id"] + extra}
+
+        dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
+
+        outputs = [item for item in dataset]
+        self.assertEqual(
+            nested_simplify(outputs), [{"id": [[12, 22]]}, {"id": [[2, 3]]}, {"id": [[2, 4]]}, {"id": [[5]]}]
+        )
+
+    def test_pipeline_chunk_iterator(self):
+        from transformers.pipelines.pt_utils import PipelineChunkIterator
+
+        def preprocess_chunk(n: int):
+            for i in range(n):
+                yield i
+
+        dataset = [2, 3]
+
+        dataset = PipelineChunkIterator(dataset, preprocess_chunk, {}, loader_batch_size=3)
+
+        outputs = [item for item in dataset]
+
+        self.assertEqual(outputs, [0, 1, 0, 1, 2])
+
+    def test_pipeline_pack_iterator(self):
+        from transformers.pipelines.pt_utils import PipelinePackIterator
+
+        def pack(item):
+            return {"id": item["id"] + 1, "is_last": item["is_last"]}
+
+        dataset = [
+            {"id": 0, "is_last": False},
+            {"id": 1, "is_last": True},
+            {"id": 0, "is_last": False},
+            {"id": 1, "is_last": False},
+            {"id": 2, "is_last": True},
+        ]
+
+        dataset = PipelinePackIterator(dataset, pack, {})
+
+        outputs = [item for item in dataset]
+        self.assertEqual(
+            outputs,
+            [
+                [
+                    {"id": 1},
+                    {"id": 2},
+                ],
+                [
+                    {"id": 1},
+                    {"id": 2},
+                    {"id": 3},
+                ],
+            ],
+        )
+
+    def test_pipeline_pack_unbatch_iterator(self):
+        from transformers.pipelines.pt_utils import PipelinePackIterator
+
+        dummy_dataset = [{"id": [0, 1, 2], "is_last": [False, True, False]}, {"id": [3], "is_last": [True]}]
+
+        def add(number, extra=0):
+            return {"id": [i + extra for i in number["id"]], "is_last": number["is_last"]}
+
+        dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
+
+        outputs = [item for item in dataset]
+        self.assertEqual(outputs, [[{"id": 2}, {"id": 3}], [{"id": 4}, {"id": 5}]])
+
+        # is_false Across batch
+        dummy_dataset = [{"id": [0, 1, 2], "is_last": [False, False, False]}, {"id": [3], "is_last": [True]}]
+
+        def add(number, extra=0):
+            return {"id": [i + extra for i in number["id"]], "is_last": number["is_last"]}
+
+        dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
+
+        outputs = [item for item in dataset]
+        self.assertEqual(outputs, [[{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]])
--- a/tests/pipelines/test_pipelines_conversational.py
+++ b/tests/pipelines/test_pipelines_conversational.py
@@ -0,0 +1,384 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_CAUSAL_LM_MAPPING,
+    MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
+    TF_MODEL_FOR_CAUSAL_LM_MAPPING,
+    TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
+    AutoModelForCausalLM,
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+    BlenderbotSmallForConditionalGeneration,
+    BlenderbotSmallTokenizer,
+    Conversation,
+    ConversationalPipeline,
+    TFAutoModelForCausalLM,
+    pipeline,
+)
+from transformers.testing_utils import is_pipeline_test, require_tf, require_torch, slow, torch_device
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
+
+
+@is_pipeline_test
+class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = dict(
+        list(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.items())
+        if MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
+        else [] + list(MODEL_FOR_CAUSAL_LM_MAPPING.items())
+        if MODEL_FOR_CAUSAL_LM_MAPPING
+        else []
+    )
+    tf_model_mapping = dict(
+        list(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.items())
+        if TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
+        else [] + list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.items())
+        if TF_MODEL_FOR_CAUSAL_LM_MAPPING
+        else []
+    )
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+        return conversation_agent, [Conversation("Hi there!")]
+
+    def run_pipeline_test(self, conversation_agent, _):
+        # Simple
+        outputs = conversation_agent(Conversation("Hi there!"))
+        self.assertEqual(outputs, Conversation(past_user_inputs=["Hi there!"], generated_responses=[ANY(str)]))
+
+        # Single list
+        outputs = conversation_agent([Conversation("Hi there!")])
+        self.assertEqual(outputs, Conversation(past_user_inputs=["Hi there!"], generated_responses=[ANY(str)]))
+
+        # Batch
+        conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
+        conversation_2 = Conversation("What's the last book you have read?")
+        self.assertEqual(len(conversation_1.past_user_inputs), 0)
+        self.assertEqual(len(conversation_2.past_user_inputs), 0)
+
+        outputs = conversation_agent([conversation_1, conversation_2])
+        self.assertEqual(outputs, [conversation_1, conversation_2])
+        self.assertEqual(
+            outputs,
+            [
+                Conversation(
+                    past_user_inputs=["Going to the movies tonight - any suggestions?"],
+                    generated_responses=[ANY(str)],
+                ),
+                Conversation(past_user_inputs=["What's the last book you have read?"], generated_responses=[ANY(str)]),
+            ],
+        )
+
+        # One conversation with history
+        conversation_2.add_user_input("Why do you recommend it?")
+        outputs = conversation_agent(conversation_2)
+        self.assertEqual(outputs, conversation_2)
+        self.assertEqual(
+            outputs,
+            Conversation(
+                past_user_inputs=["What's the last book you have read?", "Why do you recommend it?"],
+                generated_responses=[ANY(str), ANY(str)],
+            ),
+        )
+        with self.assertRaises(ValueError):
+            conversation_agent("Hi there!")
+        with self.assertRaises(ValueError):
+            conversation_agent(Conversation())
+        # Conversation have been consumed and are not valid anymore
+        # Inactive conversations passed to the pipeline raise a ValueError
+        with self.assertRaises(ValueError):
+            conversation_agent(conversation_2)
+
+    @require_torch
+    @slow
+    def test_integration_torch_conversation(self):
+        # When
+        conversation_agent = pipeline(task="conversational", device=DEFAULT_DEVICE_NUM)
+        conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
+        conversation_2 = Conversation("What's the last book you have read?")
+        # Then
+        self.assertEqual(len(conversation_1.past_user_inputs), 0)
+        self.assertEqual(len(conversation_2.past_user_inputs), 0)
+        # When
+        result = conversation_agent([conversation_1, conversation_2], do_sample=False, max_length=1000)
+        # Then
+        self.assertEqual(result, [conversation_1, conversation_2])
+        self.assertEqual(len(result[0].past_user_inputs), 1)
+        self.assertEqual(len(result[1].past_user_inputs), 1)
+        self.assertEqual(len(result[0].generated_responses), 1)
+        self.assertEqual(len(result[1].generated_responses), 1)
+        self.assertEqual(result[0].past_user_inputs[0], "Going to the movies tonight - any suggestions?")
+        self.assertEqual(result[0].generated_responses[0], "The Big Lebowski")
+        self.assertEqual(result[1].past_user_inputs[0], "What's the last book you have read?")
+        self.assertEqual(result[1].generated_responses[0], "The Last Question")
+        # When
+        conversation_2.add_user_input("Why do you recommend it?")
+        result = conversation_agent(conversation_2, do_sample=False, max_length=1000)
+        # Then
+        self.assertEqual(result, conversation_2)
+        self.assertEqual(len(result.past_user_inputs), 2)
+        self.assertEqual(len(result.generated_responses), 2)
+        self.assertEqual(result.past_user_inputs[1], "Why do you recommend it?")
+        self.assertEqual(result.generated_responses[1], "It's a good book.")
+
+    @require_torch
+    @slow
+    def test_integration_torch_conversation_truncated_history(self):
+        # When
+        conversation_agent = pipeline(task="conversational", min_length_for_response=24, device=DEFAULT_DEVICE_NUM)
+        conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
+        # Then
+        self.assertEqual(len(conversation_1.past_user_inputs), 0)
+        # When
+        result = conversation_agent(conversation_1, do_sample=False, max_length=36)
+        # Then
+        self.assertEqual(result, conversation_1)
+        self.assertEqual(len(result.past_user_inputs), 1)
+        self.assertEqual(len(result.generated_responses), 1)
+        self.assertEqual(result.past_user_inputs[0], "Going to the movies tonight - any suggestions?")
+        self.assertEqual(result.generated_responses[0], "The Big Lebowski")
+        # When
+        conversation_1.add_user_input("Is it an action movie?")
+        result = conversation_agent(conversation_1, do_sample=False, max_length=36)
+        # Then
+        self.assertEqual(result, conversation_1)
+        self.assertEqual(len(result.past_user_inputs), 2)
+        self.assertEqual(len(result.generated_responses), 2)
+        self.assertEqual(result.past_user_inputs[1], "Is it an action movie?")
+        self.assertEqual(result.generated_responses[1], "It's a comedy.")
+
+    @require_torch
+    def test_small_model_pt(self):
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
+        model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+        conversation = Conversation("hello")
+        output = conversation_agent(conversation)
+        self.assertEqual(output, Conversation(past_user_inputs=["hello"], generated_responses=["Hi"]))
+
+    @require_tf
+    def test_small_model_tf(self):
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
+        model = TFAutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+        conversation = Conversation("hello")
+        output = conversation_agent(conversation)
+        self.assertEqual(output, Conversation(past_user_inputs=["hello"], generated_responses=["Hi"]))
+
+    @require_torch
+    @slow
+    def test_integration_torch_conversation_dialogpt_input_ids(self):
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
+        model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+
+        conversation_1 = Conversation("hello")
+        inputs = conversation_agent.preprocess(conversation_1)
+        self.assertEqual(inputs["input_ids"].tolist(), [[31373, 50256]])
+
+        conversation_2 = Conversation("how are you ?", past_user_inputs=["hello"], generated_responses=["Hi there!"])
+        inputs = conversation_agent.preprocess(conversation_2)
+        self.assertEqual(
+            inputs["input_ids"].tolist(), [[31373, 50256, 17250, 612, 0, 50256, 4919, 389, 345, 5633, 50256]]
+        )
+
+    @require_torch
+    @slow
+    def test_integration_torch_conversation_blenderbot_400M_input_ids(self):
+        tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+        model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+
+        # test1
+        conversation_1 = Conversation("hello")
+        inputs = conversation_agent.preprocess(conversation_1)
+        self.assertEqual(inputs["input_ids"].tolist(), [[1710, 86, 2]])
+
+        # test2
+        conversation_1 = Conversation(
+            "I like lasagne.",
+            past_user_inputs=["hello"],
+            generated_responses=[
+                " Do you like lasagne? It is a traditional Italian dish consisting of a shepherd's pie."
+            ],
+        )
+        inputs = conversation_agent.preprocess(conversation_1)
+        self.assertEqual(
+            inputs["input_ids"].tolist(),
+            [
+                # This should be compared with the same conversation on ParlAI `safe_interactive` demo.
+                [
+                    1710,  # hello
+                    86,
+                    228,  # Double space
+                    228,
+                    946,
+                    304,
+                    398,
+                    6881,
+                    558,
+                    964,
+                    38,
+                    452,
+                    315,
+                    265,
+                    6252,
+                    452,
+                    322,
+                    968,
+                    6884,
+                    3146,
+                    278,
+                    306,
+                    265,
+                    617,
+                    87,
+                    388,
+                    75,
+                    341,
+                    286,
+                    521,
+                    21,
+                    228,  # Double space
+                    228,
+                    281,  # I like lasagne.
+                    398,
+                    6881,
+                    558,
+                    964,
+                    21,
+                    2,  # EOS
+                ],
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_integration_torch_conversation_blenderbot_400M(self):
+        tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+        model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
+
+        conversation_1 = Conversation("hello")
+        result = conversation_agent(
+            conversation_1,
+        )
+        self.assertEqual(
+            result.generated_responses[0],
+            # ParlAI implementation output, we have a different one, but it's our
+            # second best, you can check by using num_return_sequences=10
+            # " Hello! How are you? I'm just getting ready to go to work, how about you?",
+            " Hello! How are you doing today? I just got back from a walk with my dog.",
+        )
+
+        conversation_1 = Conversation("Lasagne   hello")
+        result = conversation_agent(conversation_1, encoder_no_repeat_ngram_size=3)
+        self.assertEqual(
+            result.generated_responses[0],
+            " Do you like lasagne? It is a traditional Italian dish consisting of a shepherd's pie.",
+        )
+
+        conversation_1 = Conversation(
+            "Lasagne   hello   Lasagne is my favorite Italian dish. Do you like lasagne?   I like lasagne."
+        )
+        result = conversation_agent(
+            conversation_1,
+            encoder_no_repeat_ngram_size=3,
+        )
+        self.assertEqual(
+            result.generated_responses[0],
+            " Me too. I like how it can be topped with vegetables, meats, and condiments.",
+        )
+
+    @require_torch
+    @slow
+    def test_integration_torch_conversation_encoder_decoder(self):
+        # When
+        tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot_small-90M")
+        model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot_small-90M")
+        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer, device=DEFAULT_DEVICE_NUM)
+
+        conversation_1 = Conversation("My name is Sarah and I live in London")
+        conversation_2 = Conversation("Going to the movies tonight, What movie would you recommend? ")
+        # Then
+        self.assertEqual(len(conversation_1.past_user_inputs), 0)
+        self.assertEqual(len(conversation_2.past_user_inputs), 0)
+        # When
+        result = conversation_agent([conversation_1, conversation_2], do_sample=False, max_length=1000)
+        # Then
+        self.assertEqual(result, [conversation_1, conversation_2])
+        self.assertEqual(len(result[0].past_user_inputs), 1)
+        self.assertEqual(len(result[1].past_user_inputs), 1)
+        self.assertEqual(len(result[0].generated_responses), 1)
+        self.assertEqual(len(result[1].generated_responses), 1)
+        self.assertEqual(result[0].past_user_inputs[0], "My name is Sarah and I live in London")
+        self.assertEqual(
+            result[0].generated_responses[0],
+            "hi sarah, i live in london as well. do you have any plans for the weekend?",
+        )
+        self.assertEqual(
+            result[1].past_user_inputs[0], "Going to the movies tonight, What movie would you recommend? "
+        )
+        self.assertEqual(
+            result[1].generated_responses[0], "i don't know... i'm not really sure. what movie are you going to see?"
+        )
+        # When
+        conversation_1.add_user_input("Not yet, what about you?")
+        conversation_2.add_user_input("What's your name?")
+        result = conversation_agent([conversation_1, conversation_2], do_sample=False, max_length=1000)
+        # Then
+        self.assertEqual(result, [conversation_1, conversation_2])
+        self.assertEqual(len(result[0].past_user_inputs), 2)
+        self.assertEqual(len(result[1].past_user_inputs), 2)
+        self.assertEqual(len(result[0].generated_responses), 2)
+        self.assertEqual(len(result[1].generated_responses), 2)
+        self.assertEqual(result[0].past_user_inputs[1], "Not yet, what about you?")
+        self.assertEqual(result[0].generated_responses[1], "i don't have any plans yet. i'm not sure what to do yet.")
+        self.assertEqual(result[1].past_user_inputs[1], "What's your name?")
+        self.assertEqual(result[1].generated_responses[1], "i don't have a name, but i'm going to see a horror movie.")
+
+    @require_torch
+    @slow
+    def test_from_pipeline_conversation(self):
+        model_id = "facebook/blenderbot_small-90M"
+
+        # from model id
+        conversation_agent_from_model_id = pipeline("conversational", model=model_id, tokenizer=model_id)
+
+        # from model object
+        model = BlenderbotSmallForConditionalGeneration.from_pretrained(model_id)
+        tokenizer = BlenderbotSmallTokenizer.from_pretrained(model_id)
+        conversation_agent_from_model = pipeline("conversational", model=model, tokenizer=tokenizer)
+
+        conversation = Conversation("My name is Sarah and I live in London")
+        conversation_copy = Conversation("My name is Sarah and I live in London")
+
+        result_model_id = conversation_agent_from_model_id([conversation])
+        result_model = conversation_agent_from_model([conversation_copy])
+
+        # check for equality
+        self.assertEqual(
+            result_model_id.generated_responses[0],
+            "hi sarah, i live in london as well. do you have any plans for the weekend?",
+        )
+        self.assertEqual(
+            result_model_id.generated_responses[0],
+            result_model.generated_responses[0],
+        )
--- a/tests/pipelines/test_pipelines_feature_extraction.py
+++ b/tests/pipelines/test_pipelines_feature_extraction.py
@@ -0,0 +1,107 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    FEATURE_EXTRACTOR_MAPPING,
+    MODEL_MAPPING,
+    TF_MODEL_MAPPING,
+    FeatureExtractionPipeline,
+    LxmertConfig,
+    pipeline,
+)
+from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch
+
+from .test_pipelines_common import PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_MAPPING
+    tf_model_mapping = TF_MODEL_MAPPING
+
+    @require_torch
+    def test_small_model_pt(self):
+        feature_extractor = pipeline(
+            task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="pt"
+        )
+        outputs = feature_extractor("This is a test")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]])  # fmt: skip
+
+    @require_tf
+    def test_small_model_tf(self):
+        feature_extractor = pipeline(
+            task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="tf"
+        )
+        outputs = feature_extractor("This is a test")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]])  # fmt: skip
+
+    def get_shape(self, input_, shape=None):
+        if shape is None:
+            shape = []
+        if isinstance(input_, list):
+            subshapes = [self.get_shape(in_, shape) for in_ in input_]
+            if all(s == 0 for s in subshapes):
+                shape.append(len(input_))
+            else:
+                subshape = subshapes[0]
+                shape = [len(input_), *subshape]
+        elif isinstance(input_, float):
+            return 0
+        else:
+            raise ValueError("We expect lists of floats, nothing else")
+        return shape
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if tokenizer is None:
+            self.skipTest("No tokenizer")
+            return
+        elif type(model.config) in FEATURE_EXTRACTOR_MAPPING or isinstance(model.config, LxmertConfig):
+            self.skipTest("This is a bimodal model, we need to find a more consistent way to switch on those models.")
+            return
+        elif model.config.is_encoder_decoder:
+            self.skipTest(
+                """encoder_decoder models are trickier for this pipeline.
+                Do we want encoder + decoder inputs to get some featues?
+                Do we want encoder only features ?
+                For now ignore those.
+                """
+            )
+
+            return
+        feature_extractor = FeatureExtractionPipeline(
+            model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
+        )
+        return feature_extractor, ["This is a test", "This is another test"]
+
+    def run_pipeline_test(self, feature_extractor, examples):
+        outputs = feature_extractor("This is a test")
+
+        shape = self.get_shape(outputs)
+        self.assertEqual(shape[0], 1)
+
+        # If we send too small input
+        # there's a bug within FunnelModel (output with shape [1, 4, 2, 1] doesn't match the broadcast shape [1, 4, 2, 2])
+        outputs = feature_extractor(["This is a test", "Another longer test"])
+        shape = self.get_shape(outputs)
+        self.assertEqual(shape[0], 2)
+
+        outputs = feature_extractor("This is a test" * 100, truncation=True)
+        shape = self.get_shape(outputs)
+        self.assertEqual(shape[0], 1)
--- a/tests/pipelines/test_pipelines_fill_mask.py
+++ b/tests/pipelines/test_pipelines_fill_mask.py
@@ -0,0 +1,390 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import MODEL_FOR_MASKED_LM_MAPPING, TF_MODEL_FOR_MASKED_LM_MAPPING, FillMaskPipeline, pipeline
+from transformers.pipelines import PipelineException
+from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch, slow
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_MASKED_LM_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_MASKED_LM_MAPPING
+
+    @require_tf
+    def test_small_model_tf(self):
+        unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", top_k=2, framework="tf")
+        outputs = unmasker("My name is <mask>")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=6),
+            [
+                {"sequence": "My name is grouped", "score": 2.1e-05, "token": 38015, "token_str": " grouped"},
+                {"sequence": "My name is accuser", "score": 2.1e-05, "token": 25506, "token_str": " accuser"},
+            ],
+        )
+
+        outputs = unmasker("The largest city in France is <mask>")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=6),
+            [
+                {
+                    "sequence": "The largest city in France is grouped",
+                    "score": 2.1e-05,
+                    "token": 38015,
+                    "token_str": " grouped",
+                },
+                {
+                    "sequence": "The largest city in France is accuser",
+                    "score": 2.1e-05,
+                    "token": 25506,
+                    "token_str": " accuser",
+                },
+            ],
+        )
+
+        outputs = unmasker("My name is <mask>", targets=[" Patrick", " Clara", " Teven"], top_k=3)
+        self.assertEqual(
+            nested_simplify(outputs, decimals=6),
+            [
+                {"sequence": "My name is Clara", "score": 2e-05, "token": 13606, "token_str": " Clara"},
+                {"sequence": "My name is Patrick", "score": 2e-05, "token": 3499, "token_str": " Patrick"},
+                {"sequence": "My name is Te", "score": 1.9e-05, "token": 2941, "token_str": " Te"},
+            ],
+        )
+
+    @require_torch
+    def test_small_model_pt(self):
+        unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", top_k=2, framework="pt")
+
+        outputs = unmasker("My name is <mask>")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=6),
+            [
+                {"sequence": "My name is Maul", "score": 2.2e-05, "token": 35676, "token_str": " Maul"},
+                {"sequence": "My name isELS", "score": 2.2e-05, "token": 16416, "token_str": "ELS"},
+            ],
+        )
+
+        outputs = unmasker("The largest city in France is <mask>")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=6),
+            [
+                {
+                    "sequence": "The largest city in France is Maul",
+                    "score": 2.2e-05,
+                    "token": 35676,
+                    "token_str": " Maul",
+                },
+                {"sequence": "The largest city in France isELS", "score": 2.2e-05, "token": 16416, "token_str": "ELS"},
+            ],
+        )
+
+        outputs = unmasker("My name is <mask>", targets=[" Patrick", " Clara", " Teven"], top_k=3)
+        self.assertEqual(
+            nested_simplify(outputs, decimals=6),
+            [
+                {"sequence": "My name is Patrick", "score": 2.1e-05, "token": 3499, "token_str": " Patrick"},
+                {"sequence": "My name is Te", "score": 2e-05, "token": 2941, "token_str": " Te"},
+                {"sequence": "My name is Clara", "score": 2e-05, "token": 13606, "token_str": " Clara"},
+            ],
+        )
+
+        outputs = unmasker("My name is <mask> <mask>", top_k=2)
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=6),
+            [
+                [
+                    {
+                        "score": 2.2e-05,
+                        "token": 35676,
+                        "token_str": " Maul",
+                        "sequence": "<s>My name is Maul<mask></s>",
+                    },
+                    {"score": 2.2e-05, "token": 16416, "token_str": "ELS", "sequence": "<s>My name isELS<mask></s>"},
+                ],
+                [
+                    {
+                        "score": 2.2e-05,
+                        "token": 35676,
+                        "token_str": " Maul",
+                        "sequence": "<s>My name is<mask> Maul</s>",
+                    },
+                    {"score": 2.2e-05, "token": 16416, "token_str": "ELS", "sequence": "<s>My name is<mask>ELS</s>"},
+                ],
+            ],
+        )
+
+    @slow
+    @require_torch
+    def test_large_model_pt(self):
+        unmasker = pipeline(task="fill-mask", model="distilroberta-base", top_k=2, framework="pt")
+        self.run_large_test(unmasker)
+
+    @slow
+    @require_tf
+    def test_large_model_tf(self):
+        unmasker = pipeline(task="fill-mask", model="distilroberta-base", top_k=2, framework="tf")
+        self.run_large_test(unmasker)
+
+    def run_large_test(self, unmasker):
+        outputs = unmasker("My name is <mask>")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {"sequence": "My name is John", "score": 0.008, "token": 610, "token_str": " John"},
+                {"sequence": "My name is Chris", "score": 0.007, "token": 1573, "token_str": " Chris"},
+            ],
+        )
+        outputs = unmasker("The largest city in France is <mask>")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {
+                    "sequence": "The largest city in France is Paris",
+                    "score": 0.251,
+                    "token": 2201,
+                    "token_str": " Paris",
+                },
+                {
+                    "sequence": "The largest city in France is Lyon",
+                    "score": 0.214,
+                    "token": 12790,
+                    "token_str": " Lyon",
+                },
+            ],
+        )
+
+        outputs = unmasker("My name is <mask>", targets=[" Patrick", " Clara", " Teven"], top_k=3)
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {"sequence": "My name is Patrick", "score": 0.005, "token": 3499, "token_str": " Patrick"},
+                {"sequence": "My name is Clara", "score": 0.000, "token": 13606, "token_str": " Clara"},
+                {"sequence": "My name is Te", "score": 0.000, "token": 2941, "token_str": " Te"},
+            ],
+        )
+
+    @require_torch
+    def test_model_no_pad_pt(self):
+        unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="pt")
+        unmasker.tokenizer.pad_token_id = None
+        unmasker.tokenizer.pad_token = None
+        self.run_pipeline_test(unmasker, [])
+
+    @require_tf
+    def test_model_no_pad_tf(self):
+        unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="tf")
+        unmasker.tokenizer.pad_token_id = None
+        unmasker.tokenizer.pad_token = None
+        self.run_pipeline_test(unmasker, [])
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if tokenizer is None or tokenizer.mask_token_id is None:
+            self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
+
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+        examples = [
+            f"This is another {tokenizer.mask_token} test",
+        ]
+        return fill_masker, examples
+
+    def run_pipeline_test(self, fill_masker, examples):
+        tokenizer = fill_masker.tokenizer
+        model = fill_masker.model
+
+        outputs = fill_masker(
+            f"This is a {tokenizer.mask_token}",
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+            ],
+        )
+
+        outputs = fill_masker([f"This is a {tokenizer.mask_token}"])
+        self.assertEqual(
+            outputs,
+            [
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+            ],
+        )
+
+        outputs = fill_masker([f"This is a {tokenizer.mask_token}", f"Another {tokenizer.mask_token} great test."])
+        self.assertEqual(
+            outputs,
+            [
+                [
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                ],
+                [
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                ],
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            fill_masker([None])
+        # No mask_token is not supported
+        with self.assertRaises(PipelineException):
+            fill_masker("This is")
+
+        self.run_test_top_k(model, tokenizer)
+        self.run_test_targets(model, tokenizer)
+        self.run_test_top_k_targets(model, tokenizer)
+        self.fill_mask_with_duplicate_targets_and_top_k(model, tokenizer)
+        self.fill_mask_with_multiple_masks(model, tokenizer)
+
+    def run_test_targets(self, model, tokenizer):
+        vocab = tokenizer.get_vocab()
+        targets = list(sorted(vocab.keys()))[:2]
+        # Pipeline argument
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, targets=targets)
+        outputs = fill_masker(f"This is a {tokenizer.mask_token}")
+        self.assertEqual(
+            outputs,
+            [
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+            ],
+        )
+        target_ids = {vocab[el] for el in targets}
+        self.assertEqual(set(el["token"] for el in outputs), target_ids)
+        self.assertEqual(set(el["token_str"] for el in outputs), set(targets))
+
+        # Call argument
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+        outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
+        self.assertEqual(
+            outputs,
+            [
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+            ],
+        )
+        target_ids = {vocab[el] for el in targets}
+        self.assertEqual(set(el["token"] for el in outputs), target_ids)
+        self.assertEqual(set(el["token_str"] for el in outputs), set(targets))
+
+        # Score equivalence
+        outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
+        tokens = [top_mask["token_str"] for top_mask in outputs]
+        scores = [top_mask["score"] for top_mask in outputs]
+
+        unmasked_targets = fill_masker(f"This is a {tokenizer.mask_token}", targets=tokens)
+        target_scores = [top_mask["score"] for top_mask in unmasked_targets]
+        self.assertEqual(nested_simplify(scores), nested_simplify(target_scores))
+
+        # Raises with invalid
+        with self.assertRaises(ValueError):
+            outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[""])
+        with self.assertRaises(ValueError):
+            outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=[])
+        with self.assertRaises(ValueError):
+            outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets="")
+
+    def run_test_top_k(self, model, tokenizer):
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, top_k=2)
+        outputs = fill_masker(f"This is a {tokenizer.mask_token}")
+        self.assertEqual(
+            outputs,
+            [
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+            ],
+        )
+
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+        outputs2 = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2)
+        self.assertEqual(
+            outputs2,
+            [
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+            ],
+        )
+        self.assertEqual(nested_simplify(outputs), nested_simplify(outputs2))
+
+    def run_test_top_k_targets(self, model, tokenizer):
+        vocab = tokenizer.get_vocab()
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+
+        # top_k=2, ntargets=3
+        targets = list(sorted(vocab.keys()))[:3]
+        outputs = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2, targets=targets)
+
+        # If we use the most probably targets, and filter differently, we should still
+        # have the same results
+        targets2 = [el["token_str"] for el in sorted(outputs, key=lambda x: x["score"], reverse=True)]
+        outputs2 = fill_masker(f"This is a {tokenizer.mask_token}", top_k=3, targets=targets2)
+
+        # They should yield exactly the same result
+        self.assertEqual(nested_simplify(outputs), nested_simplify(outputs2))
+
+    def fill_mask_with_duplicate_targets_and_top_k(self, model, tokenizer):
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+        vocab = tokenizer.get_vocab()
+        # String duplicates + id duplicates
+        targets = list(sorted(vocab.keys()))[:3]
+        targets = [targets[0], targets[1], targets[0], targets[2], targets[1]]
+        outputs = fill_masker(f"My name is {tokenizer.mask_token}", targets=targets, top_k=10)
+
+        # The target list contains duplicates, so we can't output more
+        # than them
+        self.assertEqual(len(outputs), 3)
+
+    def fill_mask_with_multiple_masks(self, model, tokenizer):
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+
+        outputs = fill_masker(
+            f"This is a {tokenizer.mask_token} {tokenizer.mask_token} {tokenizer.mask_token}", top_k=2
+        )
+        self.assertEqual(
+            outputs,
+            [
+                [
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                ],
+                [
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                ],
+                [
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                    {"sequence": ANY(str), "score": ANY(float), "token": ANY(int), "token_str": ANY(str)},
+                ],
+            ],
+        )
--- a/tests/pipelines/test_pipelines_image_classification.py
+++ b/tests/pipelines/test_pipelines_image_classification.py
@@ -0,0 +1,219 @@
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
+    TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
+    PreTrainedTokenizer,
+    is_vision_available,
+)
+from transformers.pipelines import ImageClassificationPipeline, pipeline
+from transformers.testing_utils import (
+    is_pipeline_test,
+    nested_simplify,
+    require_tf,
+    require_torch,
+    require_vision,
+    slow,
+)
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+if is_vision_available():
+    from PIL import Image
+else:
+
+    class Image:
+        @staticmethod
+        def open(*args, **kwargs):
+            pass
+
+
+@is_pipeline_test
+@require_vision
+class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2)
+        examples = [
+            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
+            "http://images.cocodataset.org/val2017/000000039769.jpg",
+        ]
+        return image_classifier, examples
+
+    def run_pipeline_test(self, image_classifier, examples):
+        outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
+
+        self.assertEqual(
+            outputs,
+            [
+                {"score": ANY(float), "label": ANY(str)},
+                {"score": ANY(float), "label": ANY(str)},
+            ],
+        )
+
+        import datasets
+
+        dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
+
+        # Accepts URL + PIL.Image + lists
+        outputs = image_classifier(
+            [
+                Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                # RGBA
+                dataset[0]["file"],
+                # LA
+                dataset[1]["file"],
+                # L
+                dataset[2]["file"],
+            ]
+        )
+        self.assertEqual(
+            outputs,
+            [
+                [
+                    {"score": ANY(float), "label": ANY(str)},
+                    {"score": ANY(float), "label": ANY(str)},
+                ],
+                [
+                    {"score": ANY(float), "label": ANY(str)},
+                    {"score": ANY(float), "label": ANY(str)},
+                ],
+                [
+                    {"score": ANY(float), "label": ANY(str)},
+                    {"score": ANY(float), "label": ANY(str)},
+                ],
+                [
+                    {"score": ANY(float), "label": ANY(str)},
+                    {"score": ANY(float), "label": ANY(str)},
+                ],
+                [
+                    {"score": ANY(float), "label": ANY(str)},
+                    {"score": ANY(float), "label": ANY(str)},
+                ],
+            ],
+        )
+
+    @require_torch
+    def test_small_model_pt(self):
+        small_model = "hf-internal-testing/tiny-random-vit"
+        image_classifier = pipeline("image-classification", model=small_model)
+
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
+        )
+
+        outputs = image_classifier(
+            [
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+            ],
+            top_k=2,
+        )
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
+                [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
+            ],
+        )
+
+    @require_tf
+    def test_small_model_tf(self):
+        small_model = "hf-internal-testing/tiny-random-vit"
+        image_classifier = pipeline("image-classification", model=small_model)
+
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
+        )
+
+        outputs = image_classifier(
+            [
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+            ],
+            top_k=2,
+        )
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
+                [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
+            ],
+        )
+
+    def test_custom_tokenizer(self):
+        tokenizer = PreTrainedTokenizer()
+
+        # Assert that the pipeline can be initialized with a feature extractor that is not in any mapping
+        image_classifier = pipeline(
+            "image-classification", model="hf-internal-testing/tiny-random-vit", tokenizer=tokenizer
+        )
+
+        self.assertIs(image_classifier.tokenizer, tokenizer)
+
+    @slow
+    @require_torch
+    def test_perceiver(self):
+        # Perceiver is not tested by `run_pipeline_test` properly.
+        # That is because the type of feature_extractor and model preprocessor need to be kept
+        # in sync, which is not the case in the current design
+        image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-conv")
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.4385, "label": "tabby, tabby cat"},
+                {"score": 0.321, "label": "tiger cat"},
+                {"score": 0.0502, "label": "Egyptian cat"},
+                {"score": 0.0137, "label": "crib, cot"},
+                {"score": 0.007, "label": "radiator"},
+            ],
+        )
+
+        image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-fourier")
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.5658, "label": "tabby, tabby cat"},
+                {"score": 0.1309, "label": "tiger cat"},
+                {"score": 0.0722, "label": "Egyptian cat"},
+                {"score": 0.0707, "label": "remote control, remote"},
+                {"score": 0.0082, "label": "computer keyboard, keypad"},
+            ],
+        )
+
+        image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-learned")
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.3022, "label": "tabby, tabby cat"},
+                {"score": 0.2362, "label": "Egyptian cat"},
+                {"score": 0.1856, "label": "tiger cat"},
+                {"score": 0.0324, "label": "remote control, remote"},
+                {"score": 0.0096, "label": "quilt, comforter, comfort, puff"},
+            ],
+        )
--- a/tests/pipelines/test_pipelines_image_segmentation.py
+++ b/tests/pipelines/test_pipelines_image_segmentation.py
@@ -0,0 +1,310 @@
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import hashlib
+import unittest
+
+import datasets
+
+from transformers import (
+    MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
+    MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
+    AutoFeatureExtractor,
+    AutoModelForImageSegmentation,
+    DetrForSegmentation,
+    ImageSegmentationPipeline,
+    is_vision_available,
+    pipeline,
+)
+from transformers.testing_utils import (
+    is_pipeline_test,
+    nested_simplify,
+    require_tf,
+    require_timm,
+    require_torch,
+    require_vision,
+    slow,
+)
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+if is_vision_available():
+    from PIL import Image
+else:
+
+    class Image:
+        @staticmethod
+        def open(*args, **kwargs):
+            pass
+
+
+def hashimage(image: Image) -> str:
+    m = hashlib.md5(image.tobytes())
+    return m.hexdigest()
+
+
+@require_vision
+@require_timm
+@require_torch
+@is_pipeline_test
+class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = {
+        k: v
+        for k, v in (
+            list(MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items()) if MODEL_FOR_IMAGE_SEGMENTATION_MAPPING else []
+        )
+        + (MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() if MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING else [])
+    }
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor)
+        return image_segmenter, [
+            "./tests/fixtures/tests_samples/COCO/000000039769.png",
+            "./tests/fixtures/tests_samples/COCO/000000039769.png",
+        ]
+
+    def run_pipeline_test(self, image_segmenter, examples):
+        outputs = image_segmenter("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
+        self.assertIsInstance(outputs, list)
+        n = len(outputs)
+        self.assertGreater(n, 1)
+        # XXX: PIL.Image implements __eq__ which bypasses ANY, so we inverse the comparison
+        # to make it work
+        self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n, outputs)
+
+        dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
+
+        # RGBA
+        outputs = image_segmenter(dataset[0]["file"])
+        m = len(outputs)
+        self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
+        # LA
+        outputs = image_segmenter(dataset[1]["file"])
+        m = len(outputs)
+        self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
+        # L
+        outputs = image_segmenter(dataset[2]["file"])
+        m = len(outputs)
+        self.assertEqual([{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * m, outputs)
+
+        if isinstance(image_segmenter.model, DetrForSegmentation):
+            # We need to test batch_size with images with the same size.
+            # Detr doesn't normalize the size of the images, meaning we can have
+            # 800x800 or 800x1200, meaning we cannot batch simply.
+            # We simply bail on this
+            batch_size = 1
+        else:
+            batch_size = 2
+
+        # 5 times the same image so the output shape is predictable
+        batch = [
+            "./tests/fixtures/tests_samples/COCO/000000039769.png",
+            "./tests/fixtures/tests_samples/COCO/000000039769.png",
+            "./tests/fixtures/tests_samples/COCO/000000039769.png",
+            "./tests/fixtures/tests_samples/COCO/000000039769.png",
+            "./tests/fixtures/tests_samples/COCO/000000039769.png",
+        ]
+        outputs = image_segmenter(batch, threshold=0.0, batch_size=batch_size)
+        self.assertEqual(len(batch), len(outputs))
+        self.assertEqual({"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}, outputs[0][0])
+        self.assertEqual(len(outputs[0]), n)
+        self.assertEqual(
+            [
+                [{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
+                [{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
+                [{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
+                [{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
+                [{"score": ANY(float, type(None)), "label": ANY(str), "mask": ANY(Image.Image)}] * n,
+            ],
+            outputs,
+            f"Expected [{n}, {n}, {n}, {n}, {n}], got {[len(item) for item in outputs]}",
+        )
+
+    @require_tf
+    @unittest.skip("Image segmentation not implemented in TF")
+    def test_small_model_tf(self):
+        pass
+
+    @require_torch
+    def test_small_model_pt(self):
+        model_id = "mishig/tiny-detr-mobilenetsv3-panoptic"
+
+        model = AutoModelForImageSegmentation.from_pretrained(model_id)
+        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
+        image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor)
+
+        outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.0)
+        for o in outputs:
+            # shortening by hashing
+            o["mask"] = hashimage(o["mask"])
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {
+                    "score": 0.004,
+                    "label": "LABEL_0",
+                    "mask": "34eecd16bbfb0f476083ef947d81bf66",
+                },
+                {
+                    "score": 0.004,
+                    "label": "LABEL_0",
+                    "mask": "34eecd16bbfb0f476083ef947d81bf66",
+                },
+            ],
+        )
+
+        outputs = image_segmenter(
+            [
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+            ],
+            threshold=0.0,
+        )
+        for output in outputs:
+            for o in output:
+                o["mask"] = hashimage(o["mask"])
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                [
+                    {
+                        "score": 0.004,
+                        "label": "LABEL_0",
+                        "mask": "34eecd16bbfb0f476083ef947d81bf66",
+                    },
+                    {
+                        "score": 0.004,
+                        "label": "LABEL_0",
+                        "mask": "34eecd16bbfb0f476083ef947d81bf66",
+                    },
+                ],
+                [
+                    {
+                        "score": 0.004,
+                        "label": "LABEL_0",
+                        "mask": "34eecd16bbfb0f476083ef947d81bf66",
+                    },
+                    {
+                        "score": 0.004,
+                        "label": "LABEL_0",
+                        "mask": "34eecd16bbfb0f476083ef947d81bf66",
+                    },
+                ],
+            ],
+        )
+
+    @require_torch
+    def test_small_model_pt_semantic(self):
+        model_id = "hf-internal-testing/tiny-random-beit-pipeline"
+        image_segmenter = pipeline(model=model_id)
+        outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg")
+        for o in outputs:
+            # shortening by hashing
+            o["mask"] = hashimage(o["mask"])
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {
+                    "score": None,
+                    "label": "LABEL_0",
+                    "mask": "01245d8ad25d03f09493ca97965788ae",
+                },
+                {
+                    "score": None,
+                    "label": "LABEL_1",
+                    "mask": "f741516de8d5196a2c830739b9ac1c8c",
+                },
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_integration_torch_image_segmentation(self):
+        model_id = "facebook/detr-resnet-50-panoptic"
+
+        image_segmenter = pipeline("image-segmentation", model=model_id)
+
+        outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg")
+        for o in outputs:
+            o["mask"] = hashimage(o["mask"])
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.9094, "label": "blanket", "mask": "36517c16f4356f7af4b298f4eae387f9fe37eaf8"},
+                {"score": 0.9941, "label": "cat", "mask": "d63196cbe08c7655c158dbabbc5e6b413cbb3b2d"},
+                {"score": 0.9987, "label": "remote", "mask": "4e190e0c3934ad852aaa51aa2c54e314b9a1152e"},
+                {"score": 0.9995, "label": "remote", "mask": "39dc07a07238048a06b0c2474de01ba3c09cc44f"},
+                {"score": 0.9722, "label": "couch", "mask": "df5815755b6bcf328f6b6811f8794cad26f79b35"},
+                {"score": 0.9994, "label": "cat", "mask": "88b37bd2202c750cc9dd191518050a9b0ca5228c"},
+            ],
+        )
+
+        outputs = image_segmenter(
+            [
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+            ],
+            threshold=0.0,
+        )
+        for output in outputs:
+            for o in output:
+                o["mask"] = hashlib.sha1(o["mask"].encode("UTF-8")).hexdigest()
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                [
+                    {"score": 0.9094, "label": "blanket", "mask": "36517c16f4356f7af4b298f4eae387f9fe37eaf8"},
+                    {"score": 0.9941, "label": "cat", "mask": "d63196cbe08c7655c158dbabbc5e6b413cbb3b2d"},
+                    {"score": 0.9987, "label": "remote", "mask": "4e190e0c3934ad852aaa51aa2c54e314b9a1152e"},
+                    {"score": 0.9995, "label": "remote", "mask": "39dc07a07238048a06b0c2474de01ba3c09cc44f"},
+                    {"score": 0.9722, "label": "couch", "mask": "df5815755b6bcf328f6b6811f8794cad26f79b35"},
+                    {"score": 0.9994, "label": "cat", "mask": "88b37bd2202c750cc9dd191518050a9b0ca5228c"},
+                ],
+                [
+                    {"score": 0.9094, "label": "blanket", "mask": "36517c16f4356f7af4b298f4eae387f9fe37eaf8"},
+                    {"score": 0.9941, "label": "cat", "mask": "d63196cbe08c7655c158dbabbc5e6b413cbb3b2d"},
+                    {"score": 0.9987, "label": "remote", "mask": "4e190e0c3934ad852aaa51aa2c54e314b9a1152e"},
+                    {"score": 0.9995, "label": "remote", "mask": "39dc07a07238048a06b0c2474de01ba3c09cc44f"},
+                    {"score": 0.9722, "label": "couch", "mask": "df5815755b6bcf328f6b6811f8794cad26f79b35"},
+                    {"score": 0.9994, "label": "cat", "mask": "88b37bd2202c750cc9dd191518050a9b0ca5228c"},
+                ],
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_threshold(self):
+        threshold = 0.999
+        model_id = "facebook/detr-resnet-50-panoptic"
+
+        image_segmenter = pipeline("image-segmentation", model=model_id)
+
+        outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=threshold)
+
+        for o in outputs:
+            o["mask"] = hashimage(o["mask"])
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.9995, "label": "remote", "mask": "39dc07a07238048a06b0c2474de01ba3c09cc44f"},
+                {"score": 0.9994, "label": "cat", "mask": "88b37bd2202c750cc9dd191518050a9b0ca5228c"},
+            ],
+        )
--- a/tests/pipelines/test_pipelines_object_detection.py
+++ b/tests/pipelines/test_pipelines_object_detection.py
@@ -0,0 +1,254 @@
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_OBJECT_DETECTION_MAPPING,
+    AutoFeatureExtractor,
+    AutoModelForObjectDetection,
+    ObjectDetectionPipeline,
+    is_vision_available,
+    pipeline,
+)
+from transformers.testing_utils import (
+    is_pipeline_test,
+    nested_simplify,
+    require_tf,
+    require_timm,
+    require_torch,
+    require_vision,
+    slow,
+)
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+if is_vision_available():
+    from PIL import Image
+else:
+
+    class Image:
+        @staticmethod
+        def open(*args, **kwargs):
+            pass
+
+
+@require_vision
+@require_timm
+@require_torch
+@is_pipeline_test
+class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
+        return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
+
+    def run_pipeline_test(self, object_detector, examples):
+        outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
+
+        self.assertGreater(len(outputs), 0)
+        for detected_object in outputs:
+            self.assertEqual(
+                detected_object,
+                {
+                    "score": ANY(float),
+                    "label": ANY(str),
+                    "box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
+                },
+            )
+
+        import datasets
+
+        dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
+
+        batch = [
+            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
+            "http://images.cocodataset.org/val2017/000000039769.jpg",
+            # RGBA
+            dataset[0]["file"],
+            # LA
+            dataset[1]["file"],
+            # L
+            dataset[2]["file"],
+        ]
+        batch_outputs = object_detector(batch, threshold=0.0)
+
+        self.assertEqual(len(batch), len(batch_outputs))
+        for outputs in batch_outputs:
+            self.assertGreater(len(outputs), 0)
+            for detected_object in outputs:
+                self.assertEqual(
+                    detected_object,
+                    {
+                        "score": ANY(float),
+                        "label": ANY(str),
+                        "box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
+                    },
+                )
+
+    @require_tf
+    @unittest.skip("Object detection not implemented in TF")
+    def test_small_model_tf(self):
+        pass
+
+    @require_torch
+    def test_small_model_pt(self):
+        model_id = "mishig/tiny-detr-mobilenetsv3"
+
+        model = AutoModelForObjectDetection.from_pretrained(model_id)
+        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
+        object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
+
+        outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.0)
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
+                {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
+            ],
+        )
+
+        outputs = object_detector(
+            [
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+            ],
+            threshold=0.0,
+        )
+
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                [
+                    {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
+                    {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
+                ],
+                [
+                    {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
+                    {"score": 0.3432, "label": "LABEL_0", "box": {"xmin": 160, "ymin": 120, "xmax": 480, "ymax": 359}},
+                ],
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_large_model_pt(self):
+        model_id = "facebook/detr-resnet-50"
+
+        model = AutoModelForObjectDetection.from_pretrained(model_id)
+        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
+        object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
+
+        outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
+                {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
+                {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
+                {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
+                {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
+            ],
+        )
+
+        outputs = object_detector(
+            [
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+            ]
+        )
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                [
+                    {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
+                    {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
+                    {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
+                    {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
+                    {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
+                ],
+                [
+                    {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
+                    {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
+                    {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
+                    {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
+                    {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
+                ],
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_integration_torch_object_detection(self):
+        model_id = "facebook/detr-resnet-50"
+
+        object_detector = pipeline("object-detection", model=model_id)
+
+        outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
+                {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
+                {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
+                {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
+                {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
+            ],
+        )
+
+        outputs = object_detector(
+            [
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+                "http://images.cocodataset.org/val2017/000000039769.jpg",
+            ]
+        )
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                [
+                    {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
+                    {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
+                    {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
+                    {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
+                    {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
+                ],
+                [
+                    {"score": 0.9982, "label": "remote", "box": {"xmin": 40, "ymin": 70, "xmax": 175, "ymax": 117}},
+                    {"score": 0.9960, "label": "remote", "box": {"xmin": 333, "ymin": 72, "xmax": 368, "ymax": 187}},
+                    {"score": 0.9955, "label": "couch", "box": {"xmin": 0, "ymin": 1, "xmax": 639, "ymax": 473}},
+                    {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
+                    {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
+                ],
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_threshold(self):
+        threshold = 0.9985
+        model_id = "facebook/detr-resnet-50"
+
+        object_detector = pipeline("object-detection", model=model_id)
+
+        outputs = object_detector("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=threshold)
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.9988, "label": "cat", "box": {"xmin": 13, "ymin": 52, "xmax": 314, "ymax": 470}},
+                {"score": 0.9987, "label": "cat", "box": {"xmin": 345, "ymin": 23, "xmax": 640, "ymax": 368}},
+            ],
+        )
--- a/tests/pipelines/test_pipelines_question_answering.py
+++ b/tests/pipelines/test_pipelines_question_answering.py
@@ -0,0 +1,357 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_QUESTION_ANSWERING_MAPPING,
+    TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
+    LxmertConfig,
+    QuestionAnsweringPipeline,
+)
+from transformers.data.processors.squad import SquadExample
+from transformers.pipelines import QuestionAnsweringArgumentHandler, pipeline
+from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch, slow
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if isinstance(model.config, LxmertConfig):
+            # This is an bimodal model, we need to find a more consistent way
+            # to switch on those models.
+            return None, None
+        question_answerer = QuestionAnsweringPipeline(model, tokenizer)
+
+        examples = [
+            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
+            {"question": "In what field is HuggingFace ?", "context": "HuggingFace is  an AI startup."},
+        ]
+        return question_answerer, examples
+
+    def run_pipeline_test(self, question_answerer, _):
+        outputs = question_answerer(
+            question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
+        )
+        self.assertEqual(outputs, {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)})
+        outputs = question_answerer(
+            question="Where was HuggingFace founded ?",
+            context="HuggingFace was founded in Paris.",
+            handle_impossible_answer=True,
+        )
+        self.assertEqual(outputs, {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)})
+
+        outputs = question_answerer(
+            question=["In what field is HuggingFace working ?", "In what field is HuggingFace working ?"],
+            context="HuggingFace was founded in Paris.",
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
+                {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
+            ],
+        )
+
+        outputs = question_answerer(
+            question=["What field is HuggingFace working ?", "In what field is HuggingFace ?"],
+            context=[
+                "HuggingFace is a startup based in New-York",
+                "HuggingFace is a startup founded in Paris",
+            ],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
+                {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            question_answerer(question="", context="HuggingFace was founded in Paris.")
+        with self.assertRaises(ValueError):
+            question_answerer(question=None, context="HuggingFace was founded in Paris.")
+        with self.assertRaises(ValueError):
+            question_answerer(question="In what field is HuggingFace working ?", context="")
+        with self.assertRaises(ValueError):
+            question_answerer(question="In what field is HuggingFace working ?", context=None)
+
+        outputs = question_answerer(
+            question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris.", topk=20
+        )
+        self.assertEqual(
+            outputs, [{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)} for i in range(20)]
+        )
+
+        # Very long context require multiple features
+        outputs = question_answerer(
+            question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris." * 20
+        )
+        self.assertEqual(outputs, {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)})
+
+    @require_torch
+    def test_small_model_pt(self):
+        question_answerer = pipeline(
+            "question-answering", model="sshleifer/tiny-distilbert-base-cased-distilled-squad"
+        )
+        outputs = question_answerer(
+            question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
+        )
+
+        self.assertEqual(nested_simplify(outputs), {"score": 0.01, "start": 0, "end": 11, "answer": "HuggingFace"})
+
+    @require_tf
+    def test_small_model_tf(self):
+        question_answerer = pipeline(
+            "question-answering", model="sshleifer/tiny-distilbert-base-cased-distilled-squad", framework="tf"
+        )
+        outputs = question_answerer(
+            question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
+        )
+
+        self.assertEqual(nested_simplify(outputs), {"score": 0.011, "start": 0, "end": 11, "answer": "HuggingFace"})
+
+    @slow
+    @require_torch
+    def test_large_model_pt(self):
+        question_answerer = pipeline(
+            "question-answering",
+        )
+        outputs = question_answerer(
+            question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
+        )
+
+        self.assertEqual(nested_simplify(outputs), {"score": 0.979, "start": 27, "end": 32, "answer": "Paris"})
+
+    @slow
+    @require_torch
+    def test_large_model_issue(self):
+        qa_pipeline = pipeline(
+            "question-answering",
+            model="mrm8488/bert-multi-cased-finetuned-xquadv1",
+        )
+        outputs = qa_pipeline(
+            {
+                "context": "Yes Bank founder Rana Kapoor has approached the Bombay High Court, challenging a special court's order from August this year that had remanded him in police custody for a week in a multi-crore loan fraud case. Kapoor, who is currently lodged in Taloja Jail, is an accused in the loan fraud case and some related matters being probed by the CBI and Enforcement Directorate. A single bench presided over by Justice S K Shinde on Tuesday posted the plea for further hearing on October 14. In his plea filed through advocate Vijay Agarwal, Kapoor claimed that the special court's order permitting the CBI's request for police custody on August 14 was illegal and in breach of the due process of law. Therefore, his police custody and subsequent judicial custody in the case were all illegal. Kapoor has urged the High Court to quash and set aside the special court's order dated August 14. As per his plea, in August this year, the CBI had moved two applications before the special court, one seeking permission to arrest Kapoor, who was already in judicial custody at the time in another case, and the other, seeking his police custody. While the special court refused to grant permission to the CBI to arrest Kapoor, it granted the central agency's plea for his custody. Kapoor, however, said in his plea that before filing an application for his arrest, the CBI had not followed the process of issuing him a notice under Section 41 of the CrPC for appearance before it. He further said that the CBI had not taken prior sanction as mandated under section 17 A of the Prevention of Corruption Act for prosecuting him. The special court, however, had said in its order at the time that as Kapoor was already in judicial custody in another case and was not a free man the procedure mandated under Section 41 of the CrPC need not have been adhered to as far as issuing a prior notice of appearance was concerned. ADVERTISING It had also said that case records showed that the investigating officer had taken an approval from a managing director of Yes Bank before beginning the proceedings against Kapoor and such a permission was a valid sanction. However, Kapoor in his plea said that the above order was bad in law and sought that it be quashed and set aside. The law mandated that if initial action was not in consonance with legal procedures, then all subsequent actions must be held as illegal, he said, urging the High Court to declare the CBI remand and custody and all subsequent proceedings including the further custody as illegal and void ab-initio. In a separate plea before the High Court, Kapoor's daughter Rakhee Kapoor-Tandon has sought exemption from in-person appearance before a special PMLA court. Rakhee has stated that she is a resident of the United Kingdom and is unable to travel to India owing to restrictions imposed due to the COVID-19 pandemic. According to the CBI, in the present case, Kapoor had obtained a gratification or pecuniary advantage of ₹ 307 crore, and thereby caused Yes Bank a loss of ₹ 1,800 crore by extending credit facilities to Avantha Group, when it was not eligible for the same",
+                "question": "Is this person invovled in fraud?",
+            }
+        )
+        self.assertEqual(
+            nested_simplify(outputs),
+            {"answer": "an accused in the loan fraud case", "end": 294, "score": 0.001, "start": 261},
+        )
+
+    @slow
+    @require_torch
+    def test_large_model_course(self):
+        question_answerer = pipeline("question-answering")
+        long_context = """
+🤗 Transformers: State of the Art NLP
+
+🤗 Transformers provides thousands of pretrained models to perform tasks on texts such as classification, information extraction,
+question answering, summarization, translation, text generation and more in over 100 languages.
+Its aim is to make cutting-edge NLP easier to use for everyone.
+
+🤗 Transformers provides APIs to quickly download and use those pretrained models on a given text, fine-tune them on your own datasets and
+then share them with the community on our model hub. At the same time, each python module defining an architecture is fully standalone and
+can be modified to enable quick research experiments.
+
+Why should I use transformers?
+
+1. Easy-to-use state-of-the-art models:
+  - High performance on NLU and NLG tasks.
+  - Low barrier to entry for educators and practitioners.
+  - Few user-facing abstractions with just three classes to learn.
+  - A unified API for using all our pretrained models.
+  - Lower compute costs, smaller carbon footprint:
+
+2. Researchers can share trained models instead of always retraining.
+  - Practitioners can reduce compute time and production costs.
+  - Dozens of architectures with over 10,000 pretrained models, some in more than 100 languages.
+
+3. Choose the right framework for every part of a model's lifetime:
+  - Train state-of-the-art models in 3 lines of code.
+  - Move a single model between TF2.0/PyTorch frameworks at will.
+  - Seamlessly pick the right framework for training, evaluation and production.
+
+4. Easily customize a model or an example to your needs:
+  - We provide examples for each architecture to reproduce the results published by its original authors.
+  - Model internals are exposed as consistently as possible.
+  - Model files can be used independently of the library for quick experiments.
+
+🤗 Transformers is backed by the three most popular deep learning libraries — Jax, PyTorch and TensorFlow — with a seamless integration
+between them. It's straightforward to train your models with one before loading them for inference with the other.
+"""
+        question = "Which deep learning libraries back 🤗 Transformers?"
+        outputs = question_answerer(question=question, context=long_context)
+
+        self.assertEqual(
+            nested_simplify(outputs),
+            {"answer": "Jax, PyTorch and TensorFlow", "end": 1919, "score": 0.971, "start": 1892},
+        )
+
+    @slow
+    @require_tf
+    def test_large_model_tf(self):
+        question_answerer = pipeline("question-answering", framework="tf")
+        outputs = question_answerer(
+            question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
+        )
+
+        self.assertEqual(nested_simplify(outputs), {"score": 0.979, "start": 27, "end": 32, "answer": "Paris"})
+
+
+@is_pipeline_test
+class QuestionAnsweringArgumentHandlerTests(unittest.TestCase):
+    def test_argument_handler(self):
+        qa = QuestionAnsweringArgumentHandler()
+
+        Q = "Where was HuggingFace founded ?"
+        C = "HuggingFace was founded in Paris"
+
+        normalized = qa(Q, C)
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 1)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa(question=Q, context=C)
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 1)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa(question=Q, context=C)
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 1)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa(question=[Q, Q], context=C)
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 2)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa({"question": Q, "context": C})
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 1)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa([{"question": Q, "context": C}])
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 1)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa([{"question": Q, "context": C}, {"question": Q, "context": C}])
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 2)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa(X={"question": Q, "context": C})
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 1)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa(X=[{"question": Q, "context": C}])
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 1)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+        normalized = qa(data={"question": Q, "context": C})
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 1)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+    def test_argument_handler_error_handling(self):
+        qa = QuestionAnsweringArgumentHandler()
+
+        Q = "Where was HuggingFace founded ?"
+        C = "HuggingFace was founded in Paris"
+
+        with self.assertRaises(KeyError):
+            qa({"context": C})
+        with self.assertRaises(KeyError):
+            qa({"question": Q})
+        with self.assertRaises(KeyError):
+            qa([{"context": C}])
+        with self.assertRaises(ValueError):
+            qa(None, C)
+        with self.assertRaises(ValueError):
+            qa("", C)
+        with self.assertRaises(ValueError):
+            qa(Q, None)
+        with self.assertRaises(ValueError):
+            qa(Q, "")
+
+        with self.assertRaises(ValueError):
+            qa(question=None, context=C)
+        with self.assertRaises(ValueError):
+            qa(question="", context=C)
+        with self.assertRaises(ValueError):
+            qa(question=Q, context=None)
+        with self.assertRaises(ValueError):
+            qa(question=Q, context="")
+
+        with self.assertRaises(ValueError):
+            qa({"question": None, "context": C})
+        with self.assertRaises(ValueError):
+            qa({"question": "", "context": C})
+        with self.assertRaises(ValueError):
+            qa({"question": Q, "context": None})
+        with self.assertRaises(ValueError):
+            qa({"question": Q, "context": ""})
+
+        with self.assertRaises(ValueError):
+            qa([{"question": Q, "context": C}, {"question": None, "context": C}])
+        with self.assertRaises(ValueError):
+            qa([{"question": Q, "context": C}, {"question": "", "context": C}])
+
+        with self.assertRaises(ValueError):
+            qa([{"question": Q, "context": C}, {"question": Q, "context": None}])
+        with self.assertRaises(ValueError):
+            qa([{"question": Q, "context": C}, {"question": Q, "context": ""}])
+
+        with self.assertRaises(ValueError):
+            qa(question={"This": "Is weird"}, context="This is a context")
+
+        with self.assertRaises(ValueError):
+            qa(question=[Q, Q], context=[C, C, C])
+
+        with self.assertRaises(ValueError):
+            qa(question=[Q, Q, Q], context=[C, C])
+
+    def test_argument_handler_old_format(self):
+        qa = QuestionAnsweringArgumentHandler()
+
+        Q = "Where was HuggingFace founded ?"
+        C = "HuggingFace was founded in Paris"
+        # Backward compatibility for this
+        normalized = qa(question=[Q, Q], context=[C, C])
+        self.assertEqual(type(normalized), list)
+        self.assertEqual(len(normalized), 2)
+        self.assertEqual({type(el) for el in normalized}, {SquadExample})
+
+    def test_argument_handler_error_handling_odd(self):
+        qa = QuestionAnsweringArgumentHandler()
+        with self.assertRaises(ValueError):
+            qa(None)
+
+        with self.assertRaises(ValueError):
+            qa(Y=None)
+
+        with self.assertRaises(ValueError):
+            qa(1)
--- a/tests/pipelines/test_pipelines_summarization.py
+++ b/tests/pipelines/test_pipelines_summarization.py
@@ -0,0 +1,97 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
+    TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
+    LEDConfig,
+    SummarizationPipeline,
+    T5Config,
+    pipeline,
+)
+from transformers.testing_utils import is_pipeline_test, require_tf, require_torch, slow, torch_device
+from transformers.tokenization_utils import TruncationStrategy
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
+
+
+@is_pipeline_test
+class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
+        return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]
+
+    def run_pipeline_test(self, summarizer, _):
+        model = summarizer.model
+
+        outputs = summarizer("(CNN)The Palestinian Authority officially became")
+        self.assertEqual(outputs, [{"summary_text": ANY(str)}])
+
+        outputs = summarizer(
+            "(CNN)The Palestinian Authority officially became ",
+            num_beams=2,
+            min_length=2,
+            max_length=5,
+        )
+        self.assertEqual(outputs, [{"summary_text": ANY(str)}])
+
+        if not isinstance(model.config, (T5Config, LEDConfig)):
+            # LED, T5 can handle it.
+            # Too long.
+            with self.assertRaises(Exception):
+                outputs = summarizer("This " * 1000)
+        outputs = summarizer("This " * 1000, truncation=TruncationStrategy.ONLY_FIRST)
+
+    @require_torch
+    def test_small_model_pt(self):
+        summarizer = pipeline(task="summarization", model="sshleifer/tiny-mbart", framework="pt")
+        outputs = summarizer("This is a small test")
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "summary_text": "เข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไป"
+                }
+            ],
+        )
+
+    @require_tf
+    def test_small_model_tf(self):
+        summarizer = pipeline(task="summarization", model="sshleifer/tiny-mbart", framework="tf")
+        outputs = summarizer("This is a small test")
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "summary_text": "เข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไปเข้าไป"
+                }
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_integration_torch_summarization(self):
+        summarizer = pipeline(task="summarization", device=DEFAULT_DEVICE_NUM)
+        cnn_article = ' (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.'
+        expected_cnn_summary = " The Palestinian Authority becomes the 123rd member of the International Criminal Court . The move gives the court jurisdiction over alleged crimes in Palestinian territories . Israel and the United States opposed the Palestinians' efforts to join the court . Rights group Human Rights Watch welcomes the move, says governments seeking to penalize Palestine should end pressure ."
+        result = summarizer(cnn_article)
+        self.assertEqual(result[0]["summary_text"], expected_cnn_summary)
--- a/tests/pipelines/test_pipelines_table_question_answering.py
+++ b/tests/pipelines/test_pipelines_table_question_answering.py
@@ -0,0 +1,634 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
+    AutoModelForTableQuestionAnswering,
+    AutoTokenizer,
+    TableQuestionAnsweringPipeline,
+    TFAutoModelForTableQuestionAnswering,
+    pipeline,
+)
+from transformers.testing_utils import (
+    is_pipeline_test,
+    require_pandas,
+    require_tensorflow_probability,
+    require_tf,
+    require_torch,
+    require_torch_scatter,
+    slow,
+)
+
+from .test_pipelines_common import PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    # Putting it there for consistency, but TQA do not have fast tokenizer
+    # which are needed to generate automatic tests
+    model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
+
+    @require_tensorflow_probability
+    @require_pandas
+    @require_tf
+    @require_torch
+    def test_small_model_tf(self):
+        model_id = "lysandre/tiny-tapas-random-wtq"
+        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.assertIsInstance(model.config.aggregation_labels, dict)
+        self.assertIsInstance(model.config.no_aggregation_label_index, int)
+
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query="how many movies has george clooney played in?",
+        )
+        self.assertEqual(
+            outputs,
+            {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+        )
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+            ],
+        )
+        outputs = table_querier(
+            table={
+                "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                "Stars": ["36542", "4512", "3934"],
+                "Contributors": ["651", "77", "34"],
+                "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+            },
+            query=[
+                "What repository has the largest number of stars?",
+                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+                "What is the number of repositories?",
+                "What is the average number of stars?",
+                "What is the total amount of stars?",
+            ],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table=None)
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table="")
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table={})
+        with self.assertRaises(ValueError):
+            table_querier(
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                }
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query="",
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query=None,
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+
+    @require_torch
+    @require_torch_scatter
+    def test_small_model_pt(self):
+        model_id = "lysandre/tiny-tapas-random-wtq"
+        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.assertIsInstance(model.config.aggregation_labels, dict)
+        self.assertIsInstance(model.config.no_aggregation_label_index, int)
+
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query="how many movies has george clooney played in?",
+        )
+        self.assertEqual(
+            outputs,
+            {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+        )
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+            ],
+        )
+        outputs = table_querier(
+            table={
+                "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                "Stars": ["36542", "4512", "3934"],
+                "Contributors": ["651", "77", "34"],
+                "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+            },
+            query=[
+                "What repository has the largest number of stars?",
+                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+                "What is the number of repositories?",
+                "What is the average number of stars?",
+                "What is the total amount of stars?",
+            ],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+                {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"},
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table=None)
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table="")
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table={})
+        with self.assertRaises(ValueError):
+            table_querier(
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                }
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query="",
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query=None,
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+
+    @require_torch
+    @require_torch_scatter
+    def test_slow_tokenizer_sqa_pt(self):
+        model_id = "lysandre/tiny-tapas-random-sqa"
+        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+
+        inputs = {
+            "table": {
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            "query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        }
+        sequential_outputs = table_querier(**inputs, sequential=True)
+        batch_outputs = table_querier(**inputs, sequential=False)
+
+        self.assertEqual(len(sequential_outputs), 3)
+        self.assertEqual(len(batch_outputs), 3)
+        self.assertEqual(sequential_outputs[0], batch_outputs[0])
+        self.assertNotEqual(sequential_outputs[1], batch_outputs[1])
+        # self.assertNotEqual(sequential_outputs[2], batch_outputs[2])
+
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query="how many movies has george clooney played in?",
+        )
+        self.assertEqual(
+            outputs,
+            {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+        )
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+            ],
+        )
+        outputs = table_querier(
+            table={
+                "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                "Stars": ["36542", "4512", "3934"],
+                "Contributors": ["651", "77", "34"],
+                "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+            },
+            query=[
+                "What repository has the largest number of stars?",
+                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+                "What is the number of repositories?",
+                "What is the average number of stars?",
+                "What is the total amount of stars?",
+            ],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table=None)
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table="")
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table={})
+        with self.assertRaises(ValueError):
+            table_querier(
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                }
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query="",
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query=None,
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+
+    @require_tf
+    @require_tensorflow_probability
+    @require_pandas
+    @require_torch
+    def test_slow_tokenizer_sqa_tf(self):
+        model_id = "lysandre/tiny-tapas-random-sqa"
+        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+
+        inputs = {
+            "table": {
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            "query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        }
+        sequential_outputs = table_querier(**inputs, sequential=True)
+        batch_outputs = table_querier(**inputs, sequential=False)
+
+        self.assertEqual(len(sequential_outputs), 3)
+        self.assertEqual(len(batch_outputs), 3)
+        self.assertEqual(sequential_outputs[0], batch_outputs[0])
+        self.assertNotEqual(sequential_outputs[1], batch_outputs[1])
+        # self.assertNotEqual(sequential_outputs[2], batch_outputs[2])
+
+        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query="how many movies has george clooney played in?",
+        )
+        self.assertEqual(
+            outputs,
+            {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+        )
+        outputs = table_querier(
+            table={
+                "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
+                "age": ["56", "45", "59"],
+                "number of movies": ["87", "53", "69"],
+                "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+            },
+            query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+                {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]},
+            ],
+        )
+        outputs = table_querier(
+            table={
+                "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                "Stars": ["36542", "4512", "3934"],
+                "Contributors": ["651", "77", "34"],
+                "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+            },
+            query=[
+                "What repository has the largest number of stars?",
+                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+                "What is the number of repositories?",
+                "What is the average number of stars?",
+                "What is the total amount of stars?",
+            ],
+        )
+        self.assertEqual(
+            outputs,
+            [
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+                {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]},
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table=None)
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table="")
+        with self.assertRaises(ValueError):
+            table_querier(query="What does it do with empty context ?", table={})
+        with self.assertRaises(ValueError):
+            table_querier(
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                }
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query="",
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+        with self.assertRaises(ValueError):
+            table_querier(
+                query=None,
+                table={
+                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
+                    "Stars": ["36542", "4512", "3934"],
+                    "Contributors": ["651", "77", "34"],
+                    "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+                },
+            )
+
+    @slow
+    @require_torch_scatter
+    def test_integration_wtq_pt(self):
+        table_querier = pipeline("table-question-answering")
+
+        data = {
+            "Repository": ["Transformers", "Datasets", "Tokenizers"],
+            "Stars": ["36542", "4512", "3934"],
+            "Contributors": ["651", "77", "34"],
+            "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+        }
+        queries = [
+            "What repository has the largest number of stars?",
+            "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+            "What is the number of repositories?",
+            "What is the average number of stars?",
+            "What is the total amount of stars?",
+        ]
+
+        results = table_querier(data, queries)
+
+        expected_results = [
+            {"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
+            {"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
+            {
+                "answer": "COUNT > Transformers, Datasets, Tokenizers",
+                "coordinates": [(0, 0), (1, 0), (2, 0)],
+                "cells": ["Transformers", "Datasets", "Tokenizers"],
+                "aggregator": "COUNT",
+            },
+            {
+                "answer": "AVERAGE > 36542, 4512, 3934",
+                "coordinates": [(0, 1), (1, 1), (2, 1)],
+                "cells": ["36542", "4512", "3934"],
+                "aggregator": "AVERAGE",
+            },
+            {
+                "answer": "SUM > 36542, 4512, 3934",
+                "coordinates": [(0, 1), (1, 1), (2, 1)],
+                "cells": ["36542", "4512", "3934"],
+                "aggregator": "SUM",
+            },
+        ]
+        self.assertListEqual(results, expected_results)
+
+    @slow
+    @require_tensorflow_probability
+    @require_pandas
+    def test_integration_wtq_tf(self):
+        model_id = "google/tapas-base-finetuned-wtq"
+        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        table_querier = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
+
+        data = {
+            "Repository": ["Transformers", "Datasets", "Tokenizers"],
+            "Stars": ["36542", "4512", "3934"],
+            "Contributors": ["651", "77", "34"],
+            "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
+        }
+        queries = [
+            "What repository has the largest number of stars?",
+            "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
+            "What is the number of repositories?",
+            "What is the average number of stars?",
+            "What is the total amount of stars?",
+        ]
+
+        results = table_querier(data, queries)
+
+        expected_results = [
+            {"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
+            {"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
+            {
+                "answer": "COUNT > Transformers, Datasets, Tokenizers",
+                "coordinates": [(0, 0), (1, 0), (2, 0)],
+                "cells": ["Transformers", "Datasets", "Tokenizers"],
+                "aggregator": "COUNT",
+            },
+            {
+                "answer": "AVERAGE > 36542, 4512, 3934",
+                "coordinates": [(0, 1), (1, 1), (2, 1)],
+                "cells": ["36542", "4512", "3934"],
+                "aggregator": "AVERAGE",
+            },
+            {
+                "answer": "SUM > 36542, 4512, 3934",
+                "coordinates": [(0, 1), (1, 1), (2, 1)],
+                "cells": ["36542", "4512", "3934"],
+                "aggregator": "SUM",
+            },
+        ]
+        self.assertListEqual(results, expected_results)
+
+    @slow
+    @require_torch_scatter
+    def test_integration_sqa_pt(self):
+        table_querier = pipeline(
+            "table-question-answering",
+            model="google/tapas-base-finetuned-sqa",
+            tokenizer="google/tapas-base-finetuned-sqa",
+        )
+        data = {
+            "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
+            "Age": ["56", "45", "59"],
+            "Number of movies": ["87", "53", "69"],
+            "Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+        }
+        queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"]
+        results = table_querier(data, queries, sequential=True)
+
+        expected_results = [
+            {"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]},
+            {"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]},
+            {"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]},
+        ]
+        self.assertListEqual(results, expected_results)
+
+    @slow
+    @require_tensorflow_probability
+    @require_pandas
+    def test_integration_sqa_tf(self):
+        model_id = "google/tapas-base-finetuned-sqa"
+        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        table_querier = pipeline(
+            "table-question-answering",
+            model=model,
+            tokenizer=tokenizer,
+        )
+        data = {
+            "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
+            "Age": ["56", "45", "59"],
+            "Number of movies": ["87", "53", "69"],
+            "Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
+        }
+        queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"]
+        results = table_querier(data, queries, sequential=True)
+
+        expected_results = [
+            {"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]},
+            {"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]},
+            {"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]},
+        ]
+        self.assertListEqual(results, expected_results)
--- a/tests/pipelines/test_pipelines_text2text_generation.py
+++ b/tests/pipelines/test_pipelines_text2text_generation.py
@@ -0,0 +1,91 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
+    TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
+    Text2TextGenerationPipeline,
+    pipeline,
+)
+from transformers.testing_utils import is_pipeline_test, require_tf, require_torch
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
+        return generator, ["Something to write", "Something else"]
+
+    def run_pipeline_test(self, generator, _):
+        outputs = generator("Something there")
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        # These are encoder decoder, they don't just append to incoming string
+        self.assertFalse(outputs[0]["generated_text"].startswith("Something there"))
+
+        outputs = generator(["This is great !", "Something else"], num_return_sequences=2, do_sample=True)
+        self.assertEqual(
+            outputs,
+            [
+                [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+                [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+            ],
+        )
+
+        outputs = generator(
+            ["This is great !", "Something else"], num_return_sequences=2, batch_size=2, do_sample=True
+        )
+        self.assertEqual(
+            outputs,
+            [
+                [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+                [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            generator(4)
+
+    @require_torch
+    def test_small_model_pt(self):
+        generator = pipeline("text2text-generation", model="patrickvonplaten/t5-tiny-random", framework="pt")
+        # do_sample=False necessary for reproducibility
+        outputs = generator("Something there", do_sample=False)
+        self.assertEqual(outputs, [{"generated_text": ""}])
+
+        num_return_sequences = 3
+        outputs = generator(
+            "Something there",
+            num_return_sequences=num_return_sequences,
+            num_beams=num_return_sequences,
+        )
+        target_outputs = [
+            {"generated_text": "Beide Beide Beide Beide Beide Beide Beide Beide Beide"},
+            {"generated_text": "Beide Beide Beide Beide Beide Beide Beide Beide"},
+            {"generated_text": ""},
+        ]
+        self.assertEqual(outputs, target_outputs)
+
+    @require_tf
+    def test_small_model_tf(self):
+        generator = pipeline("text2text-generation", model="patrickvonplaten/t5-tiny-random", framework="tf")
+        # do_sample=False necessary for reproducibility
+        outputs = generator("Something there", do_sample=False)
+        self.assertEqual(outputs, [{"generated_text": ""}])
--- a/tests/pipelines/test_pipelines_text_classification.py
+++ b/tests/pipelines/test_pipelines_text_classification.py
@@ -0,0 +1,95 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+    TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+    TextClassificationPipeline,
+    pipeline,
+)
+from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch, slow
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
+
+    @require_torch
+    def test_small_model_pt(self):
+        text_classifier = pipeline(
+            task="text-classification", model="hf-internal-testing/tiny-random-distilbert", framework="pt"
+        )
+
+        outputs = text_classifier("This is great !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
+
+    @require_tf
+    def test_small_model_tf(self):
+        text_classifier = pipeline(
+            task="text-classification", model="hf-internal-testing/tiny-random-distilbert", framework="tf"
+        )
+
+        outputs = text_classifier("This is great !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}])
+
+    @slow
+    @require_torch
+    def test_pt_bert(self):
+        text_classifier = pipeline("text-classification")
+
+        outputs = text_classifier("This is great !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 1.0}])
+        outputs = text_classifier("This is bad !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "NEGATIVE", "score": 1.0}])
+        outputs = text_classifier("Birds are a type of animal")
+        self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
+
+    @slow
+    @require_tf
+    def test_tf_bert(self):
+        text_classifier = pipeline("text-classification", framework="tf")
+
+        outputs = text_classifier("This is great !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 1.0}])
+        outputs = text_classifier("This is bad !")
+        self.assertEqual(nested_simplify(outputs), [{"label": "NEGATIVE", "score": 1.0}])
+        outputs = text_classifier("Birds are a type of animal")
+        self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
+        return text_classifier, ["HuggingFace is in", "This is another test"]
+
+    def run_pipeline_test(self, text_classifier, _):
+        model = text_classifier.model
+        # Small inputs because BartTokenizer tiny has maximum position embeddings = 22
+        valid_inputs = "HuggingFace is in"
+        outputs = text_classifier(valid_inputs)
+
+        self.assertEqual(nested_simplify(outputs), [{"label": ANY(str), "score": ANY(float)}])
+        self.assertTrue(outputs[0]["label"] in model.config.id2label.values())
+
+        valid_inputs = ["HuggingFace is in ", "Paris is in France"]
+        outputs = text_classifier(valid_inputs)
+        self.assertEqual(
+            nested_simplify(outputs),
+            [{"label": ANY(str), "score": ANY(float)}, {"label": ANY(str), "score": ANY(float)}],
+        )
+        self.assertTrue(outputs[0]["label"] in model.config.id2label.values())
+        self.assertTrue(outputs[1]["label"] in model.config.id2label.values())
--- a/tests/pipelines/test_pipelines_text_generation.py
+++ b/tests/pipelines/test_pipelines_text_generation.py
@@ -0,0 +1,168 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, TextGenerationPipeline, pipeline
+from transformers.testing_utils import is_pipeline_test, require_tf, require_torch
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_CAUSAL_LM_MAPPING
+
+    @require_torch
+    def test_small_model_pt(self):
+        text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="pt")
+        # Using `do_sample=False` to force deterministic output
+        outputs = text_generator("This is a test", do_sample=False)
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "generated_text": "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
+                }
+            ],
+        )
+
+        outputs = text_generator(["This is a test", "This is a second test"])
+        self.assertEqual(
+            outputs,
+            [
+                [
+                    {
+                        "generated_text": "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
+                    }
+                ],
+                [
+                    {
+                        "generated_text": "This is a second test ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
+                    }
+                ],
+            ],
+        )
+
+    @require_tf
+    def test_small_model_tf(self):
+        text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="tf")
+
+        # Using `do_sample=False` to force deterministic output
+        outputs = text_generator("This is a test", do_sample=False)
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "generated_text": "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
+                }
+            ],
+        )
+
+        outputs = text_generator(["This is a test", "This is a second test"], do_sample=False)
+        self.assertEqual(
+            outputs,
+            [
+                [
+                    {
+                        "generated_text": "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
+                    }
+                ],
+                [
+                    {
+                        "generated_text": "This is a second test Chieftain Chieftain prefecture prefecture prefecture Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
+                    }
+                ],
+            ],
+        )
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
+        return text_generator, ["This is a test", "Another test"]
+
+    def run_pipeline_test(self, text_generator, _):
+        model = text_generator.model
+        tokenizer = text_generator.tokenizer
+
+        outputs = text_generator("This is a test")
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
+
+        outputs = text_generator("This is a test", return_full_text=False)
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        self.assertNotIn("This is a test", outputs[0]["generated_text"])
+
+        text_generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer, return_full_text=False)
+        outputs = text_generator("This is a test")
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        self.assertNotIn("This is a test", outputs[0]["generated_text"])
+
+        outputs = text_generator("This is a test", return_full_text=True)
+        self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
+
+        outputs = text_generator(["This is great !", "Something else"], num_return_sequences=2, do_sample=True)
+        self.assertEqual(
+            outputs,
+            [
+                [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+                [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+            ],
+        )
+
+        if text_generator.tokenizer.pad_token is not None:
+            outputs = text_generator(
+                ["This is great !", "Something else"], num_return_sequences=2, batch_size=2, do_sample=True
+            )
+            self.assertEqual(
+                outputs,
+                [
+                    [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+                    [{"generated_text": ANY(str)}, {"generated_text": ANY(str)}],
+                ],
+            )
+
+        # Empty prompt is slighly special
+        # it requires BOS token to exist.
+        # Special case for Pegasus which will always append EOS so will
+        # work even without BOS.
+        if text_generator.tokenizer.bos_token_id is not None or "Pegasus" in tokenizer.__class__.__name__:
+            outputs = text_generator("")
+            self.assertEqual(outputs, [{"generated_text": ANY(str)}])
+        else:
+            with self.assertRaises((ValueError, AssertionError)):
+                outputs = text_generator("")
+
+        if text_generator.framework == "tf":
+            # TF generation does not support max_new_tokens, and it's impossible
+            # to control long generation with only max_length without
+            # fancy calculation, dismissing tests for now.
+            return
+        # We don't care about infinite range models.
+        # They already work.
+        # Skip this test for XGLM, since it uses sinusoidal positional embeddings which are resized on-the-fly.
+        if tokenizer.model_max_length < 10000 and "XGLM" not in tokenizer.__class__.__name__:
+            # Handling of large generations
+            with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)):
+                text_generator("This is a test" * 500, max_new_tokens=20)
+
+            outputs = text_generator("This is a test" * 500, handle_long_generation="hole", max_new_tokens=20)
+            # Hole strategy cannot work
+            with self.assertRaises(ValueError):
+                text_generator(
+                    "This is a test" * 500,
+                    handle_long_generation="hole",
+                    max_new_tokens=tokenizer.model_max_length + 10,
+                )
--- a/tests/pipelines/test_pipelines_token_classification.py
+++ b/tests/pipelines/test_pipelines_token_classification.py
@@ -0,0 +1,758 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+from transformers import (
+    MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
+    TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    TokenClassificationPipeline,
+    pipeline,
+)
+from transformers.pipelines import AggregationStrategy, TokenClassificationArgumentHandler
+from transformers.testing_utils import (
+    is_pipeline_test,
+    nested_simplify,
+    require_tf,
+    require_torch,
+    require_torch_gpu,
+    slow,
+)
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+VALID_INPUTS = ["A simple string", ["list of strings", "A simple string that is quite a bit longer"]]
+
+
+@is_pipeline_test
+class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
+        return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]
+
+    def run_pipeline_test(self, token_classifier, _):
+        model = token_classifier.model
+        tokenizer = token_classifier.tokenizer
+
+        outputs = token_classifier("A simple string")
+        self.assertIsInstance(outputs, list)
+        n = len(outputs)
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {
+                    "entity": ANY(str),
+                    "score": ANY(float),
+                    "start": ANY(int),
+                    "end": ANY(int),
+                    "index": ANY(int),
+                    "word": ANY(str),
+                }
+                for i in range(n)
+            ],
+        )
+        outputs = token_classifier(["list of strings", "A simple string that is quite a bit longer"])
+        self.assertIsInstance(outputs, list)
+        self.assertEqual(len(outputs), 2)
+        n = len(outputs[0])
+        m = len(outputs[1])
+
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                [
+                    {
+                        "entity": ANY(str),
+                        "score": ANY(float),
+                        "start": ANY(int),
+                        "end": ANY(int),
+                        "index": ANY(int),
+                        "word": ANY(str),
+                    }
+                    for i in range(n)
+                ],
+                [
+                    {
+                        "entity": ANY(str),
+                        "score": ANY(float),
+                        "start": ANY(int),
+                        "end": ANY(int),
+                        "index": ANY(int),
+                        "word": ANY(str),
+                    }
+                    for i in range(m)
+                ],
+            ],
+        )
+
+        self.run_aggregation_strategy(model, tokenizer)
+
+    def run_aggregation_strategy(self, model, tokenizer):
+        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer, aggregation_strategy="simple")
+        self.assertEqual(token_classifier._postprocess_params["aggregation_strategy"], AggregationStrategy.SIMPLE)
+        outputs = token_classifier("A simple string")
+        self.assertIsInstance(outputs, list)
+        n = len(outputs)
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {
+                    "entity_group": ANY(str),
+                    "score": ANY(float),
+                    "start": ANY(int),
+                    "end": ANY(int),
+                    "word": ANY(str),
+                }
+                for i in range(n)
+            ],
+        )
+
+        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer, aggregation_strategy="first")
+        self.assertEqual(token_classifier._postprocess_params["aggregation_strategy"], AggregationStrategy.FIRST)
+        outputs = token_classifier("A simple string")
+        self.assertIsInstance(outputs, list)
+        n = len(outputs)
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {
+                    "entity_group": ANY(str),
+                    "score": ANY(float),
+                    "start": ANY(int),
+                    "end": ANY(int),
+                    "word": ANY(str),
+                }
+                for i in range(n)
+            ],
+        )
+
+        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer, aggregation_strategy="max")
+        self.assertEqual(token_classifier._postprocess_params["aggregation_strategy"], AggregationStrategy.MAX)
+        outputs = token_classifier("A simple string")
+        self.assertIsInstance(outputs, list)
+        n = len(outputs)
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {
+                    "entity_group": ANY(str),
+                    "score": ANY(float),
+                    "start": ANY(int),
+                    "end": ANY(int),
+                    "word": ANY(str),
+                }
+                for i in range(n)
+            ],
+        )
+
+        token_classifier = TokenClassificationPipeline(
+            model=model, tokenizer=tokenizer, aggregation_strategy="average"
+        )
+        self.assertEqual(token_classifier._postprocess_params["aggregation_strategy"], AggregationStrategy.AVERAGE)
+        outputs = token_classifier("A simple string")
+        self.assertIsInstance(outputs, list)
+        n = len(outputs)
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {
+                    "entity_group": ANY(str),
+                    "score": ANY(float),
+                    "start": ANY(int),
+                    "end": ANY(int),
+                    "word": ANY(str),
+                }
+                for i in range(n)
+            ],
+        )
+
+        with self.assertWarns(UserWarning):
+            token_classifier = pipeline(task="ner", model=model, tokenizer=tokenizer, grouped_entities=True)
+        self.assertEqual(token_classifier._postprocess_params["aggregation_strategy"], AggregationStrategy.SIMPLE)
+        with self.assertWarns(UserWarning):
+            token_classifier = pipeline(
+                task="ner", model=model, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=True
+            )
+        self.assertEqual(token_classifier._postprocess_params["aggregation_strategy"], AggregationStrategy.FIRST)
+
+    @require_torch
+    @slow
+    def test_spanish_bert(self):
+        # https://github.com/huggingface/transformers/pull/4987
+        NER_MODEL = "mrm8488/bert-spanish-cased-finetuned-ner"
+        model = AutoModelForTokenClassification.from_pretrained(NER_MODEL)
+        tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, use_fast=True)
+        sentence = """Consuelo Araújo Noguera, ministra de cultura del presidente Andrés Pastrana (1998.2002) fue asesinada por las Farc luego de haber permanecido secuestrada por algunos meses."""
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer)
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output[:3]),
+            [
+                {"entity": "B-PER", "score": 0.999, "word": "Cons", "start": 0, "end": 4, "index": 1},
+                {"entity": "B-PER", "score": 0.803, "word": "##uelo", "start": 4, "end": 8, "index": 2},
+                {"entity": "I-PER", "score": 0.999, "word": "Ara", "start": 9, "end": 12, "index": 3},
+            ],
+        )
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output[:3]),
+            [
+                {"entity_group": "PER", "score": 0.999, "word": "Cons", "start": 0, "end": 4},
+                {"entity_group": "PER", "score": 0.966, "word": "##uelo Araújo Noguera", "start": 4, "end": 23},
+                {"entity_group": "PER", "score": 1.0, "word": "Andrés Pastrana", "start": 60, "end": 75},
+            ],
+        )
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="first")
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output[:3]),
+            [
+                {"entity_group": "PER", "score": 0.999, "word": "Consuelo Araújo Noguera", "start": 0, "end": 23},
+                {"entity_group": "PER", "score": 1.0, "word": "Andrés Pastrana", "start": 60, "end": 75},
+                {"entity_group": "ORG", "score": 0.999, "word": "Farc", "start": 110, "end": 114},
+            ],
+        )
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="max")
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output[:3]),
+            [
+                {"entity_group": "PER", "score": 0.999, "word": "Consuelo Araújo Noguera", "start": 0, "end": 23},
+                {"entity_group": "PER", "score": 1.0, "word": "Andrés Pastrana", "start": 60, "end": 75},
+                {"entity_group": "ORG", "score": 0.999, "word": "Farc", "start": 110, "end": 114},
+            ],
+        )
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="average")
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output[:3]),
+            [
+                {"entity_group": "PER", "score": 0.966, "word": "Consuelo Araújo Noguera", "start": 0, "end": 23},
+                {"entity_group": "PER", "score": 1.0, "word": "Andrés Pastrana", "start": 60, "end": 75},
+                {"entity_group": "ORG", "score": 0.542, "word": "Farc", "start": 110, "end": 114},
+            ],
+        )
+
+    @require_torch_gpu
+    @slow
+    def test_gpu(self):
+        sentence = "This is dummy sentence"
+        ner = pipeline(
+            "token-classification",
+            device=0,
+            aggregation_strategy=AggregationStrategy.SIMPLE,
+        )
+
+        output = ner(sentence)
+        self.assertEqual(nested_simplify(output), [])
+
+    @require_torch
+    @slow
+    def test_dbmdz_english(self):
+        # Other sentence
+        NER_MODEL = "dbmdz/bert-large-cased-finetuned-conll03-english"
+        model = AutoModelForTokenClassification.from_pretrained(NER_MODEL)
+        tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, use_fast=True)
+        sentence = """Enzo works at the the UN"""
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer)
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output),
+            [
+                {"entity": "I-PER", "score": 0.997, "word": "En", "start": 0, "end": 2, "index": 1},
+                {"entity": "I-PER", "score": 0.996, "word": "##zo", "start": 2, "end": 4, "index": 2},
+                {"entity": "I-ORG", "score": 0.999, "word": "UN", "start": 22, "end": 24, "index": 7},
+            ],
+        )
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output),
+            [
+                {"entity_group": "PER", "score": 0.996, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 22, "end": 24},
+            ],
+        )
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="first")
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output[:3]),
+            [
+                {"entity_group": "PER", "score": 0.997, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 22, "end": 24},
+            ],
+        )
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="max")
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output[:3]),
+            [
+                {"entity_group": "PER", "score": 0.997, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 22, "end": 24},
+            ],
+        )
+
+        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="average")
+        output = token_classifier(sentence)
+        self.assertEqual(
+            nested_simplify(output),
+            [
+                {"entity_group": "PER", "score": 0.996, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 22, "end": 24},
+            ],
+        )
+
+    @require_torch
+    @slow
+    def test_aggregation_strategy_byte_level_tokenizer(self):
+        sentence = "Groenlinks praat over Schiphol."
+        ner = pipeline("ner", model="xlm-roberta-large-finetuned-conll02-dutch", aggregation_strategy="max")
+        self.assertEqual(
+            nested_simplify(ner(sentence)),
+            [
+                {"end": 10, "entity_group": "ORG", "score": 0.994, "start": 0, "word": "Groenlinks"},
+                {"entity_group": "LOC", "score": 1.0, "word": "Schiphol.", "start": 22, "end": 31},
+            ],
+        )
+
+    @require_torch
+    def test_aggregation_strategy_no_b_i_prefix(self):
+        model_name = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+        token_classifier = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="pt")
+        # Just to understand scores indexes in this test
+        token_classifier.model.config.id2label = {0: "O", 1: "MISC", 2: "PER", 3: "ORG", 4: "LOC"}
+        example = [
+            {
+                # fmt : off
+                "scores": np.array([0, 0, 0, 0, 0.9968166351318359]),
+                "index": 1,
+                "is_subword": False,
+                "word": "En",
+                "start": 0,
+                "end": 2,
+            },
+            {
+                # fmt : off
+                "scores": np.array([0, 0, 0, 0, 0.9957635998725891]),
+                "index": 2,
+                "is_subword": True,
+                "word": "##zo",
+                "start": 2,
+                "end": 4,
+            },
+            {
+                # fmt: off
+                "scores": np.array([0, 0, 0, 0.9986497163772583, 0]),
+                # fmt: on
+                "index": 7,
+                "word": "UN",
+                "is_subword": False,
+                "start": 11,
+                "end": 13,
+            },
+        ]
+        self.assertEqual(
+            nested_simplify(token_classifier.aggregate(example, AggregationStrategy.NONE)),
+            [
+                {"end": 2, "entity": "LOC", "score": 0.997, "start": 0, "word": "En", "index": 1},
+                {"end": 4, "entity": "LOC", "score": 0.996, "start": 2, "word": "##zo", "index": 2},
+                {"end": 13, "entity": "ORG", "score": 0.999, "start": 11, "word": "UN", "index": 7},
+            ],
+        )
+        self.assertEqual(
+            nested_simplify(token_classifier.aggregate(example, AggregationStrategy.SIMPLE)),
+            [
+                {"entity_group": "LOC", "score": 0.996, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 11, "end": 13},
+            ],
+        )
+
+    @require_torch
+    def test_aggregation_strategy(self):
+        model_name = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+        token_classifier = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="pt")
+        # Just to understand scores indexes in this test
+        self.assertEqual(
+            token_classifier.model.config.id2label,
+            {0: "O", 1: "B-MISC", 2: "I-MISC", 3: "B-PER", 4: "I-PER", 5: "B-ORG", 6: "I-ORG", 7: "B-LOC", 8: "I-LOC"},
+        )
+        example = [
+            {
+                # fmt : off
+                "scores": np.array([0, 0, 0, 0, 0.9968166351318359, 0, 0, 0]),
+                "index": 1,
+                "is_subword": False,
+                "word": "En",
+                "start": 0,
+                "end": 2,
+            },
+            {
+                # fmt : off
+                "scores": np.array([0, 0, 0, 0, 0.9957635998725891, 0, 0, 0]),
+                "index": 2,
+                "is_subword": True,
+                "word": "##zo",
+                "start": 2,
+                "end": 4,
+            },
+            {
+                # fmt: off
+                "scores": np.array([0, 0, 0, 0, 0, 0.9986497163772583, 0, 0, ]),
+                # fmt: on
+                "index": 7,
+                "word": "UN",
+                "is_subword": False,
+                "start": 11,
+                "end": 13,
+            },
+        ]
+        self.assertEqual(
+            nested_simplify(token_classifier.aggregate(example, AggregationStrategy.NONE)),
+            [
+                {"end": 2, "entity": "I-PER", "score": 0.997, "start": 0, "word": "En", "index": 1},
+                {"end": 4, "entity": "I-PER", "score": 0.996, "start": 2, "word": "##zo", "index": 2},
+                {"end": 13, "entity": "B-ORG", "score": 0.999, "start": 11, "word": "UN", "index": 7},
+            ],
+        )
+        self.assertEqual(
+            nested_simplify(token_classifier.aggregate(example, AggregationStrategy.SIMPLE)),
+            [
+                {"entity_group": "PER", "score": 0.996, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 11, "end": 13},
+            ],
+        )
+        self.assertEqual(
+            nested_simplify(token_classifier.aggregate(example, AggregationStrategy.FIRST)),
+            [
+                {"entity_group": "PER", "score": 0.997, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 11, "end": 13},
+            ],
+        )
+        self.assertEqual(
+            nested_simplify(token_classifier.aggregate(example, AggregationStrategy.MAX)),
+            [
+                {"entity_group": "PER", "score": 0.997, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 11, "end": 13},
+            ],
+        )
+        self.assertEqual(
+            nested_simplify(token_classifier.aggregate(example, AggregationStrategy.AVERAGE)),
+            [
+                {"entity_group": "PER", "score": 0.996, "word": "Enzo", "start": 0, "end": 4},
+                {"entity_group": "ORG", "score": 0.999, "word": "UN", "start": 11, "end": 13},
+            ],
+        )
+
+    @require_torch
+    def test_aggregation_strategy_example2(self):
+        model_name = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+        token_classifier = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="pt")
+        # Just to understand scores indexes in this test
+        self.assertEqual(
+            token_classifier.model.config.id2label,
+            {0: "O", 1: "B-MISC", 2: "I-MISC", 3: "B-PER", 4: "I-PER", 5: "B-ORG", 6: "I-ORG", 7: "B-LOC", 8: "I-LOC"},
+        )
+        example = [
+            {
+                # Necessary for AVERAGE
+                "scores": np.array([0, 0.55, 0, 0.45, 0, 0, 0, 0, 0, 0]),
+                "is_subword": False,
+                "index": 1,
+                "word": "Ra",
+                "start": 0,
+                "end": 2,
+            },
+            {
+                "scores": np.array([0, 0, 0, 0.2, 0, 0, 0, 0.8, 0, 0]),
+                "is_subword": True,
+                "word": "##ma",
+                "start": 2,
+                "end": 4,
+                "index": 2,
+            },
+            {
+                # 4th score will have the higher average
+                # 4th score is B-PER for this model
+                # It's does not correspond to any of the subtokens.
+                "scores": np.array([0, 0, 0, 0.4, 0, 0, 0.6, 0, 0, 0]),
+                "is_subword": True,
+                "word": "##zotti",
+                "start": 11,
+                "end": 13,
+                "index": 3,
+            },
+        ]
+        self.assertEqual(
+            token_classifier.aggregate(example, AggregationStrategy.NONE),
+            [
+                {"end": 2, "entity": "B-MISC", "score": 0.55, "start": 0, "word": "Ra", "index": 1},
+                {"end": 4, "entity": "B-LOC", "score": 0.8, "start": 2, "word": "##ma", "index": 2},
+                {"end": 13, "entity": "I-ORG", "score": 0.6, "start": 11, "word": "##zotti", "index": 3},
+            ],
+        )
+
+        self.assertEqual(
+            token_classifier.aggregate(example, AggregationStrategy.FIRST),
+            [{"entity_group": "MISC", "score": 0.55, "word": "Ramazotti", "start": 0, "end": 13}],
+        )
+        self.assertEqual(
+            token_classifier.aggregate(example, AggregationStrategy.MAX),
+            [{"entity_group": "LOC", "score": 0.8, "word": "Ramazotti", "start": 0, "end": 13}],
+        )
+        self.assertEqual(
+            nested_simplify(token_classifier.aggregate(example, AggregationStrategy.AVERAGE)),
+            [{"entity_group": "PER", "score": 0.35, "word": "Ramazotti", "start": 0, "end": 13}],
+        )
+
+    @require_torch
+    def test_gather_pre_entities(self):
+        model_name = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+        token_classifier = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="pt")
+
+        sentence = "Hello there"
+
+        tokens = tokenizer(
+            sentence,
+            return_attention_mask=False,
+            return_tensors="pt",
+            truncation=True,
+            return_special_tokens_mask=True,
+            return_offsets_mapping=True,
+        )
+        offset_mapping = tokens.pop("offset_mapping").cpu().numpy()[0]
+        special_tokens_mask = tokens.pop("special_tokens_mask").cpu().numpy()[0]
+        input_ids = tokens["input_ids"].numpy()[0]
+        # First element in [CLS]
+        scores = np.array([[1, 0, 0], [0.1, 0.3, 0.6], [0.8, 0.1, 0.1]])
+
+        pre_entities = token_classifier.gather_pre_entities(
+            sentence,
+            input_ids,
+            scores,
+            offset_mapping,
+            special_tokens_mask,
+            aggregation_strategy=AggregationStrategy.NONE,
+        )
+        self.assertEqual(
+            nested_simplify(pre_entities),
+            [
+                {"word": "Hello", "scores": [0.1, 0.3, 0.6], "start": 0, "end": 5, "is_subword": False, "index": 1},
+                {
+                    "word": "there",
+                    "scores": [0.8, 0.1, 0.1],
+                    "index": 2,
+                    "start": 6,
+                    "end": 11,
+                    "is_subword": False,
+                },
+            ],
+        )
+
+    @require_tf
+    def test_tf_only(self):
+        model_name = "hf-internal-testing/tiny-random-bert-tf-only"  # This model only has a TensorFlow version
+        # We test that if we don't specificy framework='tf', it gets detected automatically
+        token_classifier = pipeline(task="ner", model=model_name)
+        self.assertEqual(token_classifier.framework, "tf")
+
+    @require_tf
+    def test_small_model_tf(self):
+        model_name = "hf-internal-testing/tiny-bert-for-token-classification"
+        token_classifier = pipeline(task="token-classification", model=model_name, framework="tf")
+        outputs = token_classifier("This is a test !")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {"entity": "I-MISC", "score": 0.115, "index": 1, "word": "this", "start": 0, "end": 4},
+                {"entity": "I-MISC", "score": 0.115, "index": 2, "word": "is", "start": 5, "end": 7},
+            ],
+        )
+
+    @require_torch
+    def test_no_offset_tokenizer(self):
+        model_name = "hf-internal-testing/tiny-bert-for-token-classification"
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+        token_classifier = pipeline(task="token-classification", model=model_name, tokenizer=tokenizer, framework="pt")
+        outputs = token_classifier("This is a test !")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {"entity": "I-MISC", "score": 0.115, "index": 1, "word": "this", "start": None, "end": None},
+                {"entity": "I-MISC", "score": 0.115, "index": 2, "word": "is", "start": None, "end": None},
+            ],
+        )
+
+    @require_torch
+    def test_small_model_pt(self):
+        model_name = "hf-internal-testing/tiny-bert-for-token-classification"
+        token_classifier = pipeline(task="token-classification", model=model_name, framework="pt")
+        outputs = token_classifier("This is a test !")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {"entity": "I-MISC", "score": 0.115, "index": 1, "word": "this", "start": 0, "end": 4},
+                {"entity": "I-MISC", "score": 0.115, "index": 2, "word": "is", "start": 5, "end": 7},
+            ],
+        )
+
+        token_classifier = pipeline(
+            task="token-classification", model=model_name, framework="pt", ignore_labels=["O", "I-MISC"]
+        )
+        outputs = token_classifier("This is a test !")
+        self.assertEqual(
+            nested_simplify(outputs),
+            [],
+        )
+
+        token_classifier = pipeline(task="token-classification", model=model_name, framework="pt")
+        # Overload offset_mapping
+        outputs = token_classifier(
+            "This is a test !", offset_mapping=[(0, 0), (0, 1), (0, 2), (0, 0), (0, 0), (0, 0), (0, 0)]
+        )
+        self.assertEqual(
+            nested_simplify(outputs),
+            [
+                {"entity": "I-MISC", "score": 0.115, "index": 1, "word": "this", "start": 0, "end": 1},
+                {"entity": "I-MISC", "score": 0.115, "index": 2, "word": "is", "start": 0, "end": 2},
+            ],
+        )
+
+    @require_torch
+    def test_pt_ignore_subwords_slow_tokenizer_raises(self):
+        model_name = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+
+        with self.assertRaises(ValueError):
+            pipeline(task="ner", model=model_name, tokenizer=tokenizer, aggregation_strategy=AggregationStrategy.FIRST)
+        with self.assertRaises(ValueError):
+            pipeline(
+                task="ner", model=model_name, tokenizer=tokenizer, aggregation_strategy=AggregationStrategy.AVERAGE
+            )
+        with self.assertRaises(ValueError):
+            pipeline(task="ner", model=model_name, tokenizer=tokenizer, aggregation_strategy=AggregationStrategy.MAX)
+
+    @slow
+    @require_torch
+    def test_simple(self):
+        token_classifier = pipeline(task="ner", model="dslim/bert-base-NER", grouped_entities=True)
+        sentence = "Hello Sarah Jessica Parker who Jessica lives in New York"
+        sentence2 = "This is a simple test"
+        output = token_classifier(sentence)
+
+        output_ = nested_simplify(output)
+
+        self.assertEqual(
+            output_,
+            [
+                {
+                    "entity_group": "PER",
+                    "score": 0.996,
+                    "word": "Sarah Jessica Parker",
+                    "start": 6,
+                    "end": 26,
+                },
+                {"entity_group": "PER", "score": 0.977, "word": "Jessica", "start": 31, "end": 38},
+                {"entity_group": "LOC", "score": 0.999, "word": "New York", "start": 48, "end": 56},
+            ],
+        )
+
+        output = token_classifier([sentence, sentence2])
+        output_ = nested_simplify(output)
+
+        self.assertEqual(
+            output_,
+            [
+                [
+                    {"entity_group": "PER", "score": 0.996, "word": "Sarah Jessica Parker", "start": 6, "end": 26},
+                    {"entity_group": "PER", "score": 0.977, "word": "Jessica", "start": 31, "end": 38},
+                    {"entity_group": "LOC", "score": 0.999, "word": "New York", "start": 48, "end": 56},
+                ],
+                [],
+            ],
+        )
+
+
+@is_pipeline_test
+class TokenClassificationArgumentHandlerTestCase(unittest.TestCase):
+    def setUp(self):
+        self.args_parser = TokenClassificationArgumentHandler()
+
+    def test_simple(self):
+        string = "This is a simple input"
+
+        inputs, offset_mapping = self.args_parser(string)
+        self.assertEqual(inputs, [string])
+        self.assertEqual(offset_mapping, None)
+
+        inputs, offset_mapping = self.args_parser([string, string])
+        self.assertEqual(inputs, [string, string])
+        self.assertEqual(offset_mapping, None)
+
+        inputs, offset_mapping = self.args_parser(string, offset_mapping=[(0, 1), (1, 2)])
+        self.assertEqual(inputs, [string])
+        self.assertEqual(offset_mapping, [[(0, 1), (1, 2)]])
+
+        inputs, offset_mapping = self.args_parser(
+            [string, string], offset_mapping=[[(0, 1), (1, 2)], [(0, 2), (2, 3)]]
+        )
+        self.assertEqual(inputs, [string, string])
+        self.assertEqual(offset_mapping, [[(0, 1), (1, 2)], [(0, 2), (2, 3)]])
+
+    def test_errors(self):
+        string = "This is a simple input"
+
+        # 2 sentences, 1 offset_mapping, args
+        with self.assertRaises(TypeError):
+            self.args_parser(string, string, offset_mapping=[[(0, 1), (1, 2)]])
+
+        # 2 sentences, 1 offset_mapping, args
+        with self.assertRaises(TypeError):
+            self.args_parser(string, string, offset_mapping=[(0, 1), (1, 2)])
+
+        # 2 sentences, 1 offset_mapping, input_list
+        with self.assertRaises(ValueError):
+            self.args_parser([string, string], offset_mapping=[[(0, 1), (1, 2)]])
+
+        # 2 sentences, 1 offset_mapping, input_list
+        with self.assertRaises(ValueError):
+            self.args_parser([string, string], offset_mapping=[(0, 1), (1, 2)])
+
+        # 1 sentences, 2 offset_mapping
+        with self.assertRaises(ValueError):
+            self.args_parser(string, offset_mapping=[[(0, 1), (1, 2)], [(0, 2), (2, 3)]])
+
+        # 0 sentences, 1 offset_mapping
+        with self.assertRaises(TypeError):
+            self.args_parser(offset_mapping=[[(0, 1), (1, 2)]])
--- a/tests/pipelines/test_pipelines_translation.py
+++ b/tests/pipelines/test_pipelines_translation.py
@@ -0,0 +1,164 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import pytest
+
+from transformers import (
+    MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
+    TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
+    MBart50TokenizerFast,
+    MBartConfig,
+    MBartForConditionalGeneration,
+    TranslationPipeline,
+    pipeline,
+)
+from transformers.testing_utils import is_pipeline_test, require_tf, require_torch, slow
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if isinstance(model.config, MBartConfig):
+            src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
+            translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
+        else:
+            translator = TranslationPipeline(model=model, tokenizer=tokenizer)
+        return translator, ["Some string", "Some other text"]
+
+    def run_pipeline_test(self, translator, _):
+        outputs = translator("Some string")
+        self.assertEqual(outputs, [{"translation_text": ANY(str)}])
+
+        outputs = translator(["Some string"])
+        self.assertEqual(outputs, [{"translation_text": ANY(str)}])
+
+        outputs = translator(["Some string", "other string"])
+        self.assertEqual(outputs, [{"translation_text": ANY(str)}, {"translation_text": ANY(str)}])
+
+    @require_torch
+    def test_small_model_pt(self):
+        translator = pipeline("translation_en_to_ro", model="patrickvonplaten/t5-tiny-random", framework="pt")
+        outputs = translator("This is a test string", max_length=20)
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "translation_text": "Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide"
+                }
+            ],
+        )
+
+    @require_tf
+    def test_small_model_tf(self):
+        translator = pipeline("translation_en_to_ro", model="patrickvonplaten/t5-tiny-random", framework="tf")
+        outputs = translator("This is a test string", max_length=20)
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "translation_text": "Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide Beide"
+                }
+            ],
+        )
+
+    @require_torch
+    def test_en_to_de_pt(self):
+        translator = pipeline("translation_en_to_de", model="patrickvonplaten/t5-tiny-random", framework="pt")
+        outputs = translator("This is a test string", max_length=20)
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "translation_text": "monoton monoton monoton monoton monoton monoton monoton monoton monoton monoton urine urine urine urine urine urine urine urine urine"
+                }
+            ],
+        )
+
+    @require_tf
+    def test_en_to_de_tf(self):
+        translator = pipeline("translation_en_to_de", model="patrickvonplaten/t5-tiny-random", framework="tf")
+        outputs = translator("This is a test string", max_length=20)
+        self.assertEqual(
+            outputs,
+            [
+                {
+                    "translation_text": "monoton monoton monoton monoton monoton monoton monoton monoton monoton monoton urine urine urine urine urine urine urine urine urine"
+                }
+            ],
+        )
+
+
+@is_pipeline_test
+class TranslationNewFormatPipelineTests(unittest.TestCase):
+    @require_torch
+    @slow
+    def test_default_translations(self):
+        # We don't provide a default for this pair
+        with self.assertRaises(ValueError):
+            pipeline(task="translation_cn_to_ar")
+
+        # but we do for this one
+        translator = pipeline(task="translation_en_to_de")
+        self.assertEqual(translator._preprocess_params["src_lang"], "en")
+        self.assertEqual(translator._preprocess_params["tgt_lang"], "de")
+
+    @require_torch
+    @slow
+    def test_multilingual_translation(self):
+        model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+        tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+
+        translator = pipeline(task="translation", model=model, tokenizer=tokenizer)
+        # Missing src_lang, tgt_lang
+        with self.assertRaises(ValueError):
+            translator("This is a test")
+
+        outputs = translator("This is a test", src_lang="en_XX", tgt_lang="ar_AR")
+        self.assertEqual(outputs, [{"translation_text": "هذا إختبار"}])
+
+        outputs = translator("This is a test", src_lang="en_XX", tgt_lang="hi_IN")
+        self.assertEqual(outputs, [{"translation_text": "यह एक परीक्षण है"}])
+
+        # src_lang, tgt_lang can be defined at pipeline call time
+        translator = pipeline(task="translation", model=model, tokenizer=tokenizer, src_lang="en_XX", tgt_lang="ar_AR")
+        outputs = translator("This is a test")
+        self.assertEqual(outputs, [{"translation_text": "هذا إختبار"}])
+
+    @require_torch
+    def test_translation_on_odd_language(self):
+        model = "patrickvonplaten/t5-tiny-random"
+        translator = pipeline(task="translation_cn_to_ar", model=model)
+        self.assertEqual(translator._preprocess_params["src_lang"], "cn")
+        self.assertEqual(translator._preprocess_params["tgt_lang"], "ar")
+
+    @require_torch
+    def test_translation_default_language_selection(self):
+        model = "patrickvonplaten/t5-tiny-random"
+        with pytest.warns(UserWarning, match=r".*translation_en_to_de.*"):
+            translator = pipeline(task="translation", model=model)
+        self.assertEqual(translator.task, "translation_en_to_de")
+        self.assertEqual(translator._preprocess_params["src_lang"], "en")
+        self.assertEqual(translator._preprocess_params["tgt_lang"], "de")
+
+    @require_torch
+    def test_translation_with_no_language_no_model_fails(self):
+        with self.assertRaises(ValueError):
+            pipeline(task="translation")
--- a/tests/pipelines/test_pipelines_zero_shot.py
+++ b/tests/pipelines/test_pipelines_zero_shot.py
@@ -0,0 +1,246 @@
+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import (
+    MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+    TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
+    Pipeline,
+    ZeroShotClassificationPipeline,
+    pipeline,
+)
+from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch, slow
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+@is_pipeline_test
+class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
+    tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
+
+    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        classifier = ZeroShotClassificationPipeline(
+            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
+        )
+        return classifier, ["Who are you voting for in 2020?", "My stomach hurts."]
+
+    def run_pipeline_test(self, classifier, _):
+        outputs = classifier("Who are you voting for in 2020?", candidate_labels="politics")
+        self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
+
+        # No kwarg
+        outputs = classifier("Who are you voting for in 2020?", ["politics"])
+        self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
+
+        outputs = classifier("Who are you voting for in 2020?", candidate_labels=["politics"])
+        self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
+
+        outputs = classifier("Who are you voting for in 2020?", candidate_labels="politics, public health")
+        self.assertEqual(
+            outputs, {"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
+        )
+        self.assertAlmostEqual(sum(nested_simplify(outputs["scores"])), 1.0)
+
+        outputs = classifier("Who are you voting for in 2020?", candidate_labels=["politics", "public health"])
+        self.assertEqual(
+            outputs, {"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
+        )
+        self.assertAlmostEqual(sum(nested_simplify(outputs["scores"])), 1.0)
+
+        outputs = classifier(
+            "Who are you voting for in 2020?", candidate_labels="politics", hypothesis_template="This text is about {}"
+        )
+        self.assertEqual(outputs, {"sequence": ANY(str), "labels": [ANY(str)], "scores": [ANY(float)]})
+
+        # https://github.com/huggingface/transformers/issues/13846
+        outputs = classifier(["I am happy"], ["positive", "negative"])
+        self.assertEqual(
+            outputs,
+            [
+                {"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
+                for i in range(1)
+            ],
+        )
+        outputs = classifier(["I am happy", "I am sad"], ["positive", "negative"])
+        self.assertEqual(
+            outputs,
+            [
+                {"sequence": ANY(str), "labels": [ANY(str), ANY(str)], "scores": [ANY(float), ANY(float)]}
+                for i in range(2)
+            ],
+        )
+
+        with self.assertRaises(ValueError):
+            classifier("", candidate_labels="politics")
+
+        with self.assertRaises(TypeError):
+            classifier(None, candidate_labels="politics")
+
+        with self.assertRaises(ValueError):
+            classifier("Who are you voting for in 2020?", candidate_labels="")
+
+        with self.assertRaises(TypeError):
+            classifier("Who are you voting for in 2020?", candidate_labels=None)
+
+        with self.assertRaises(ValueError):
+            classifier(
+                "Who are you voting for in 2020?",
+                candidate_labels="politics",
+                hypothesis_template="Not formatting template",
+            )
+
+        with self.assertRaises(AttributeError):
+            classifier(
+                "Who are you voting for in 2020?",
+                candidate_labels="politics",
+                hypothesis_template=None,
+            )
+
+        self.run_entailment_id(classifier)
+
+    def run_entailment_id(self, zero_shot_classifier: Pipeline):
+        config = zero_shot_classifier.model.config
+        original_label2id = config.label2id
+        original_entailment = zero_shot_classifier.entailment_id
+
+        config.label2id = {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}
+        self.assertEqual(zero_shot_classifier.entailment_id, -1)
+
+        config.label2id = {"entailment": 0, "neutral": 1, "contradiction": 2}
+        self.assertEqual(zero_shot_classifier.entailment_id, 0)
+
+        config.label2id = {"ENTAIL": 0, "NON-ENTAIL": 1}
+        self.assertEqual(zero_shot_classifier.entailment_id, 0)
+
+        config.label2id = {"ENTAIL": 2, "NEUTRAL": 1, "CONTR": 0}
+        self.assertEqual(zero_shot_classifier.entailment_id, 2)
+
+        zero_shot_classifier.model.config.label2id = original_label2id
+        self.assertEqual(original_entailment, zero_shot_classifier.entailment_id)
+
+    @require_torch
+    def test_truncation(self):
+        zero_shot_classifier = pipeline(
+            "zero-shot-classification",
+            model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
+            framework="pt",
+        )
+        # There was a regression in 4.10 for this
+        # Adding a test so we don't make the mistake again.
+        # https://github.com/huggingface/transformers/issues/13381#issuecomment-912343499
+        zero_shot_classifier(
+            "Who are you voting for in 2020?" * 100, candidate_labels=["politics", "public health", "science"]
+        )
+
+    @require_torch
+    def test_small_model_pt(self):
+        zero_shot_classifier = pipeline(
+            "zero-shot-classification",
+            model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
+            framework="pt",
+        )
+        outputs = zero_shot_classifier(
+            "Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
+        )
+
+        self.assertEqual(
+            nested_simplify(outputs),
+            {
+                "sequence": "Who are you voting for in 2020?",
+                "labels": ["science", "public health", "politics"],
+                "scores": [0.333, 0.333, 0.333],
+            },
+        )
+
+    @require_tf
+    def test_small_model_tf(self):
+        zero_shot_classifier = pipeline(
+            "zero-shot-classification",
+            model="sshleifer/tiny-distilbert-base-cased-distilled-squad",
+            framework="tf",
+        )
+        outputs = zero_shot_classifier(
+            "Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
+        )
+
+        self.assertEqual(
+            nested_simplify(outputs),
+            {
+                "sequence": "Who are you voting for in 2020?",
+                "labels": ["science", "public health", "politics"],
+                "scores": [0.333, 0.333, 0.333],
+            },
+        )
+
+    @slow
+    @require_torch
+    def test_large_model_pt(self):
+        zero_shot_classifier = pipeline("zero-shot-classification", model="roberta-large-mnli", framework="pt")
+        outputs = zero_shot_classifier(
+            "Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
+        )
+
+        self.assertEqual(
+            nested_simplify(outputs),
+            {
+                "sequence": "Who are you voting for in 2020?",
+                "labels": ["politics", "public health", "science"],
+                "scores": [0.976, 0.015, 0.009],
+            },
+        )
+        outputs = zero_shot_classifier(
+            "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.",
+            candidate_labels=["machine learning", "statistics", "translation", "vision"],
+            multi_label=True,
+        )
+        self.assertEqual(
+            nested_simplify(outputs),
+            {
+                "sequence": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.",
+                "labels": ["translation", "machine learning", "vision", "statistics"],
+                "scores": [0.817, 0.713, 0.018, 0.018],
+            },
+        )
+
+    @slow
+    @require_tf
+    def test_large_model_tf(self):
+        zero_shot_classifier = pipeline("zero-shot-classification", model="roberta-large-mnli", framework="tf")
+        outputs = zero_shot_classifier(
+            "Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"]
+        )
+
+        self.assertEqual(
+            nested_simplify(outputs),
+            {
+                "sequence": "Who are you voting for in 2020?",
+                "labels": ["politics", "public health", "science"],
+                "scores": [0.976, 0.015, 0.009],
+            },
+        )
+        outputs = zero_shot_classifier(
+            "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.",
+            candidate_labels=["machine learning", "statistics", "translation", "vision"],
+            multi_label=True,
+        )
+        self.assertEqual(
+            nested_simplify(outputs),
+            {
+                "sequence": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.",
+                "labels": ["translation", "machine learning", "vision", "statistics"],
+                "scores": [0.817, 0.713, 0.018, 0.018],
+            },
+        )
--- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py
@@ -0,0 +1,238 @@
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import is_vision_available
+from transformers.pipelines import pipeline
+from transformers.testing_utils import (
+    is_pipeline_test,
+    nested_simplify,
+    require_tf,
+    require_torch,
+    require_vision,
+    slow,
+)
+
+from .test_pipelines_common import ANY, PipelineTestCaseMeta
+
+
+if is_vision_available():
+    from PIL import Image
+else:
+
+    class Image:
+        @staticmethod
+        def open(*args, **kwargs):
+            pass
+
+
+@require_vision
+@is_pipeline_test
+class ZeroShotImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
+    # Deactivating auto tests since we don't have a good MODEL_FOR_XX mapping,
+    # and only CLIP would be there for now.
+    # model_mapping = {CLIPConfig: CLIPModel}
+
+    # def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    #     if tokenizer is None:
+    #         # Side effect of no Fast Tokenizer class for these model, so skipping
+    #         # But the slow tokenizer test should still run as they're quite small
+    #         self.skipTest("No tokenizer available")
+    #         return
+    #         # return None, None
+
+    #     image_classifier = ZeroShotImageClassificationPipeline(
+    #         model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
+    #     )
+
+    #     # test with a raw waveform
+    #     image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+    #     image2 = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+    #     return image_classifier, [image, image2]
+
+    # def run_pipeline_test(self, pipe, examples):
+    #     image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+    #     outputs = pipe(image, candidate_labels=["A", "B"])
+    #     self.assertEqual(outputs, {"text": ANY(str)})
+
+    #     # Batching
+    #     outputs = pipe([image] * 3, batch_size=2, candidate_labels=["A", "B"])
+
+    @require_torch
+    def test_small_model_pt(self):
+        image_classifier = pipeline(
+            model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
+        )
+        image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+        output = image_classifier(image, candidate_labels=["a", "b", "c"])
+
+        self.assertEqual(
+            nested_simplify(output),
+            [{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "b"}, {"score": 0.333, "label": "c"}],
+        )
+
+        output = image_classifier([image] * 5, candidate_labels=["A", "B", "C"], batch_size=2)
+        self.assertEqual(
+            nested_simplify(output),
+            # Pipeline outputs are supposed to be deterministic and
+            # So we could in theory have real values "A", "B", "C" instead
+            # of ANY(str).
+            # However it seems that in this particular case, the floating
+            # scores are so close, we enter floating error approximation
+            # and the order is not guaranteed anymore with batching.
+            [
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+            ],
+        )
+
+    @require_tf
+    def test_small_model_tf(self):
+        image_classifier = pipeline(
+            model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", framework="tf"
+        )
+        image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+        output = image_classifier(image, candidate_labels=["a", "b", "c"])
+
+        self.assertEqual(
+            nested_simplify(output),
+            [{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "b"}, {"score": 0.333, "label": "c"}],
+        )
+
+        output = image_classifier([image] * 5, candidate_labels=["A", "B", "C"], batch_size=2)
+        self.assertEqual(
+            nested_simplify(output),
+            # Pipeline outputs are supposed to be deterministic and
+            # So we could in theory have real values "A", "B", "C" instead
+            # of ANY(str).
+            # However it seems that in this particular case, the floating
+            # scores are so close, we enter floating error approximation
+            # and the order is not guaranteed anymore with batching.
+            [
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+                [
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                    {"score": 0.333, "label": ANY(str)},
+                ],
+            ],
+        )
+
+    @slow
+    @require_torch
+    def test_large_model_pt(self):
+        image_classifier = pipeline(
+            task="zero-shot-image-classification",
+            model="openai/clip-vit-base-patch32",
+        )
+        # This is an image of 2 cats with remotes and no planes
+        image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+        output = image_classifier(image, candidate_labels=["cat", "plane", "remote"])
+
+        self.assertEqual(
+            nested_simplify(output),
+            [
+                {"score": 0.941, "label": "cat"},
+                {"score": 0.055, "label": "remote"},
+                {"score": 0.003, "label": "plane"},
+            ],
+        )
+
+        output = image_classifier([image] * 5, candidate_labels=["cat", "plane", "remote"], batch_size=2)
+        self.assertEqual(
+            nested_simplify(output),
+            [
+                [
+                    {"score": 0.941, "label": "cat"},
+                    {"score": 0.055, "label": "remote"},
+                    {"score": 0.003, "label": "plane"},
+                ],
+            ]
+            * 5,
+        )
+
+    @slow
+    @require_tf
+    def test_large_model_tf(self):
+        image_classifier = pipeline(
+            task="zero-shot-image-classification", model="openai/clip-vit-base-patch32", framework="tf"
+        )
+        # This is an image of 2 cats with remotes and no planes
+        image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+        output = image_classifier(image, candidate_labels=["cat", "plane", "remote"])
+
+        self.assertEqual(
+            nested_simplify(output),
+            [
+                {"score": 0.941, "label": "cat"},
+                {"score": 0.055, "label": "remote"},
+                {"score": 0.003, "label": "plane"},
+            ],
+        )
+
+        output = image_classifier([image] * 5, candidate_labels=["cat", "plane", "remote"], batch_size=2)
+        self.assertEqual(
+            nested_simplify(output),
+            [
+                [
+                    {"score": 0.941, "label": "cat"},
+                    {"score": 0.055, "label": "remote"},
+                    {"score": 0.003, "label": "plane"},
+                ],
+            ]
+            * 5,
+        )