Allow FP16 or other precision inference for Pipelines (#31342)

* cast image features to model.dtype where needed to support FP16 or other precision in pipelines * Update src/transformers/pipelines/image_feature_extraction.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Use .to instead * Add FP16 pipeline support for zeroshot audio classification * Remove unused torch imports * Add docs on FP16 pipeline * Remove unused import * Add FP16 tests to pipeline mixin * Add fp16 placeholder for mask_generation pipeline test * Add FP16 tests for all pipelines * Fix formatting * Remove torch_dtype arg from is_pipeline_test_to_skip* * Fix format * trigger ci --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2024-07-06 00:21:50 +08:00
parent e786844425
commit ac26260436
45 changed files with 354 additions and 79 deletions
--- a/docs/source/en/main_classes/pipelines.md
+++ b/docs/source/en/main_classes/pipelines.md
@@ -270,6 +270,11 @@ This is a simplified view, since the pipeline can handle automatically the batch
 about how many forward passes you inputs are actually going to trigger, you can optimize the `batch_size`
 independently of the inputs. The caveats from the previous section still apply.

+## Pipeline FP16 inference
+Models can be run in FP16 which can be significantly faster on GPU while saving memory. Most models will not suffer noticeable performance loss from this. The larger the model, the less likely that it will.
+
+To enable FP16 inference, you can simply pass `torch_dtype=torch.float16` or `torch_dtype='float16'` to the pipeline constructor. Note that this only works for models with a PyTorch backend. Your inputs will be converted to FP16 internally.
+
 ## Pipeline custom code

 If you want to override a specific pipeline.
--- a/docs/source/en/pipeline_tutorial.md
+++ b/docs/source/en/pipeline_tutorial.md
@@ -113,7 +113,9 @@ This will work regardless of whether you are using PyTorch or Tensorflow.
 transcriber = pipeline(model="openai/whisper-large-v2", device=0)
 ```

-If the model is too large for a single GPU and you are using PyTorch, you can set `device_map="auto"` to automatically 
+If the model is too large for a single GPU and you are using PyTorch, you can set `torch_dtype='float16'` to enable FP16 precision inference. Usually this would not cause significant performance drops but make sure you evaluate it on your models!
+
+Alternatively, you can set `device_map="auto"` to automatically 
 determine how to load and store the model weights. Using the `device_map` argument requires the 🤗 [Accelerate](https://huggingface.co/docs/accelerate)
 package:

@@ -342,4 +344,3 @@ gr.Interface.from_pipeline(pipe).launch()

 By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
 link by setting `share=True` in `launch()`. You can also host your demo on [Hugging Face Spaces](https://huggingface.co/spaces) for a permanent link. 
-
--- a/src/transformers/pipelines/depth_estimation.py
+++ b/src/transformers/pipelines/depth_estimation.py
@@ -91,6 +91,8 @@ class DepthEstimationPipeline(Pipeline):
        image = load_image(image, timeout)
        self.image_size = image.size
        model_inputs = self.image_processor(images=image, return_tensors=self.framework)
+        if self.framework == "pt":
+            model_inputs = model_inputs.to(self.torch_dtype)
        return model_inputs

    def _forward(self, model_inputs):
--- a/src/transformers/pipelines/document_question_answering.py
+++ b/src/transformers/pipelines/document_question_answering.py
@@ -294,7 +294,10 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
        if input.get("image", None) is not None:
            image = load_image(input["image"], timeout=timeout)
            if self.image_processor is not None:
-                image_features.update(self.image_processor(images=image, return_tensors=self.framework))
+                image_inputs = self.image_processor(images=image, return_tensors=self.framework)
+                if self.framework == "pt":
+                    image_inputs = image_inputs.to(self.torch_dtype)
+                image_features.update(image_inputs)
            elif self.feature_extractor is not None:
                image_features.update(self.feature_extractor(images=image, return_tensors=self.framework))
            elif self.model_type == ModelType.VisionEncoderDecoder:
--- a/src/transformers/pipelines/image_classification.py
+++ b/src/transformers/pipelines/image_classification.py
@@ -161,6 +161,8 @@ class ImageClassificationPipeline(Pipeline):
    def preprocess(self, image, timeout=None):
        image = load_image(image, timeout=timeout)
        model_inputs = self.image_processor(images=image, return_tensors=self.framework)
+        if self.framework == "pt":
+            model_inputs = model_inputs.to(self.torch_dtype)
        return model_inputs

    def _forward(self, model_inputs):
--- a/src/transformers/pipelines/image_feature_extraction.py
+++ b/src/transformers/pipelines/image_feature_extraction.py
@@ -60,6 +60,8 @@ class ImageFeatureExtractionPipeline(Pipeline):
    def preprocess(self, image, timeout=None, **image_processor_kwargs) -> Dict[str, GenericTensor]:
        image = load_image(image, timeout=timeout)
        model_inputs = self.image_processor(image, return_tensors=self.framework, **image_processor_kwargs)
+        if self.framework == "pt":
+            model_inputs = model_inputs.to(self.torch_dtype)
        return model_inputs

    def _forward(self, model_inputs):
--- a/src/transformers/pipelines/image_segmentation.py
+++ b/src/transformers/pipelines/image_segmentation.py
@@ -147,6 +147,8 @@ class ImageSegmentationPipeline(Pipeline):
            else:
                kwargs = {"task_inputs": [subtask]}
            inputs = self.image_processor(images=[image], return_tensors="pt", **kwargs)
+            if self.framework == "pt":
+                inputs = inputs.to(self.torch_dtype)
            inputs["task_inputs"] = self.tokenizer(
                inputs["task_inputs"],
                padding="max_length",
@@ -155,6 +157,8 @@ class ImageSegmentationPipeline(Pipeline):
            )["input_ids"]
        else:
            inputs = self.image_processor(images=[image], return_tensors="pt")
+            if self.framework == "pt":
+                inputs = inputs.to(self.torch_dtype)
        inputs["target_size"] = target_size
        return inputs

--- a/src/transformers/pipelines/image_to_image.py
+++ b/src/transformers/pipelines/image_to_image.py
@@ -119,6 +119,8 @@ class ImageToImagePipeline(Pipeline):
    def preprocess(self, image, timeout=None):
        image = load_image(image, timeout=timeout)
        inputs = self.image_processor(images=[image], return_tensors="pt")
+        if self.framework == "pt":
+            inputs = inputs.to(self.torch_dtype)
        return inputs

    def postprocess(self, model_outputs):
--- a/src/transformers/pipelines/image_to_text.py
+++ b/src/transformers/pipelines/image_to_text.py
@@ -138,6 +138,8 @@ class ImageToTextPipeline(Pipeline):

            if model_type == "git":
                model_inputs = self.image_processor(images=image, return_tensors=self.framework)
+                if self.framework == "pt":
+                    model_inputs = model_inputs.to(self.torch_dtype)
                input_ids = self.tokenizer(text=prompt, add_special_tokens=False).input_ids
                input_ids = [self.tokenizer.cls_token_id] + input_ids
                input_ids = torch.tensor(input_ids).unsqueeze(0)
@@ -145,10 +147,14 @@ class ImageToTextPipeline(Pipeline):

            elif model_type == "pix2struct":
                model_inputs = self.image_processor(images=image, header_text=prompt, return_tensors=self.framework)
+                if self.framework == "pt":
+                    model_inputs = model_inputs.to(self.torch_dtype)

            elif model_type != "vision-encoder-decoder":
                # vision-encoder-decoder does not support conditional generation
                model_inputs = self.image_processor(images=image, return_tensors=self.framework)
+                if self.framework == "pt":
+                    model_inputs = model_inputs.to(self.torch_dtype)
                text_inputs = self.tokenizer(prompt, return_tensors=self.framework)
                model_inputs.update(text_inputs)

@@ -157,6 +163,8 @@ class ImageToTextPipeline(Pipeline):

        else:
            model_inputs = self.image_processor(images=image, return_tensors=self.framework)
+            if self.framework == "pt":
+                model_inputs = model_inputs.to(self.torch_dtype)

        if self.model.config.model_type == "git" and prompt is None:
            model_inputs["input_ids"] = None
--- a/src/transformers/pipelines/mask_generation.py
+++ b/src/transformers/pipelines/mask_generation.py
@@ -181,6 +181,8 @@ class MaskGenerationPipeline(ChunkPipeline):
            image, target_size, crops_n_layers, crop_overlap_ratio, points_per_crop, crop_n_points_downscale_factor
        )
        model_inputs = self.image_processor(images=cropped_images, return_tensors="pt")
+        if self.framework == "pt":
+            model_inputs = model_inputs.to(self.torch_dtype)

        with self.device_placement():
            if self.framework == "pt":
--- a/src/transformers/pipelines/object_detection.py
+++ b/src/transformers/pipelines/object_detection.py
@@ -107,6 +107,8 @@ class ObjectDetectionPipeline(Pipeline):
        image = load_image(image, timeout=timeout)
        target_size = torch.IntTensor([[image.height, image.width]])
        inputs = self.image_processor(images=[image], return_tensors="pt")
+        if self.framework == "pt":
+            inputs = inputs.to(self.torch_dtype)
        if self.tokenizer is not None:
            inputs = self.tokenizer(text=inputs["words"], boxes=inputs["boxes"], return_tensors="pt")
        inputs["target_size"] = target_size
--- a/src/transformers/pipelines/video_classification.py
+++ b/src/transformers/pipelines/video_classification.py
@@ -106,6 +106,8 @@ class VideoClassificationPipeline(Pipeline):
        video = list(video)

        model_inputs = self.image_processor(video, return_tensors=self.framework)
+        if self.framework == "pt":
+            model_inputs = model_inputs.to(self.torch_dtype)
        return model_inputs

    def _forward(self, model_inputs):
--- a/src/transformers/pipelines/visual_question_answering.py
+++ b/src/transformers/pipelines/visual_question_answering.py
@@ -155,6 +155,8 @@ class VisualQuestionAnsweringPipeline(Pipeline):
            truncation=truncation,
        )
        image_features = self.image_processor(images=image, return_tensors=self.framework)
+        if self.framework == "pt":
+            image_features = image_features.to(self.torch_dtype)
        model_inputs.update(image_features)
        return model_inputs

--- a/src/transformers/pipelines/zero_shot_audio_classification.py
+++ b/src/transformers/pipelines/zero_shot_audio_classification.py
@@ -121,6 +121,8 @@ class ZeroShotAudioClassificationPipeline(Pipeline):
        inputs = self.feature_extractor(
            [audio], sampling_rate=self.feature_extractor.sampling_rate, return_tensors="pt"
        )
+        if self.framework == "pt":
+            inputs = inputs.to(self.torch_dtype)
        inputs["candidate_labels"] = candidate_labels
        sequences = [hypothesis_template.format(x) for x in candidate_labels]
        text_inputs = self.tokenizer(sequences, return_tensors=self.framework, padding=True)
--- a/src/transformers/pipelines/zero_shot_image_classification.py
+++ b/src/transformers/pipelines/zero_shot_image_classification.py
@@ -120,6 +120,8 @@ class ZeroShotImageClassificationPipeline(Pipeline):
    def preprocess(self, image, candidate_labels=None, hypothesis_template="This is a photo of {}.", timeout=None):
        image = load_image(image, timeout=timeout)
        inputs = self.image_processor(images=[image], return_tensors=self.framework)
+        if self.framework == "pt":
+            inputs = inputs.to(self.torch_dtype)
        inputs["candidate_labels"] = candidate_labels
        sequences = [hypothesis_template.format(x) for x in candidate_labels]
        padding = "max_length" if self.model.config.model_type == "siglip" else True
--- a/src/transformers/pipelines/zero_shot_object_detection.py
+++ b/src/transformers/pipelines/zero_shot_object_detection.py
@@ -156,6 +156,8 @@ class ZeroShotObjectDetectionPipeline(ChunkPipeline):
        for i, candidate_label in enumerate(candidate_labels):
            text_inputs = self.tokenizer(candidate_label, return_tensors=self.framework)
            image_features = self.image_processor(image, return_tensors=self.framework)
+            if self.framework == "pt":
+                image_features = image_features.to(self.torch_dtype)
            yield {
                "is_last": i == len(candidate_labels) - 1,
                "target_size": target_size,
--- a/tests/pipelines/test_pipelines_audio_classification.py
+++ b/tests/pipelines/test_pipelines_audio_classification.py
@@ -35,8 +35,10 @@ class AudioClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        audio_classifier = AudioClassificationPipeline(
+            model=model, feature_extractor=processor, torch_dtype=torch_dtype
+        )

        # test with a raw waveform
        audio = np.zeros((34000,))
--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -66,14 +66,14 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
    )

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if tokenizer is None:
            # Side effect of no Fast Tokenizer class for these model, so skipping
            # But the slow tokenizer test should still run as they're quite small
            self.skipTest(reason="No tokenizer available")

        speech_recognizer = AutomaticSpeechRecognitionPipeline(
-            model=model, tokenizer=tokenizer, feature_extractor=processor
+            model=model, tokenizer=tokenizer, feature_extractor=processor, torch_dtype=torch_dtype
        )

        # test with a raw waveform
--- a/tests/pipelines/test_pipelines_depth_estimation.py
+++ b/tests/pipelines/test_pipelines_depth_estimation.py
@@ -56,8 +56,8 @@ def hashimage(image: Image) -> str:
 class DepthEstimationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        depth_estimator = DepthEstimationPipeline(model=model, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        depth_estimator = DepthEstimationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
        return depth_estimator, [
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_document_question_answering.py
+++ b/tests/pipelines/test_pipelines_document_question_answering.py
@@ -61,9 +61,13 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):

    @require_pytesseract
    @require_vision
-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        dqa_pipeline = pipeline(
-            "document-question-answering", model=model, tokenizer=tokenizer, image_processor=processor
+            "document-question-answering",
+            model=model,
+            tokenizer=tokenizer,
+            image_processor=processor,
+            torch_dtype=torch_dtype,
        )

        image = INVOICE_URL
--- a/tests/pipelines/test_pipelines_feature_extraction.py
+++ b/tests/pipelines/test_pipelines_feature_extraction.py
@@ -174,7 +174,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase):
            raise ValueError("We expect lists of floats, nothing else")
        return shape

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if tokenizer is None:
            self.skipTest(reason="No tokenizer")
        elif (
@@ -193,7 +193,9 @@ class FeatureExtractionPipelineTests(unittest.TestCase):
                For now ignore those.
                """
            )
-        feature_extractor = FeatureExtractionPipeline(model=model, tokenizer=tokenizer, feature_extractor=processor)
+        feature_extractor = FeatureExtractionPipeline(
+            model=model, tokenizer=tokenizer, feature_extractor=processor, torch_dtype=torch_dtype
+        )
        return feature_extractor, ["This is a test", "This is another test"]

    def run_pipeline_test(self, feature_extractor, examples):
--- a/tests/pipelines/test_pipelines_fill_mask.py
+++ b/tests/pipelines/test_pipelines_fill_mask.py
@@ -251,11 +251,11 @@ class FillMaskPipelineTests(unittest.TestCase):
        unmasker.tokenizer.pad_token = None
        self.run_pipeline_test(unmasker, [])

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if tokenizer is None or tokenizer.mask_token_id is None:
            self.skipTest(reason="The provided tokenizer has no mask token, (probably reformer or wav2vec2)")

-        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        examples = [
            f"This is another {tokenizer.mask_token} test",
        ]
--- a/tests/pipelines/test_pipelines_image_classification.py
+++ b/tests/pipelines/test_pipelines_image_classification.py
@@ -55,8 +55,10 @@ class ImageClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        image_classifier = ImageClassificationPipeline(model=model, image_processor=processor, top_k=2)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        image_classifier = ImageClassificationPipeline(
+            model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype
+        )
        examples = [
            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
            "http://images.cocodataset.org/val2017/000000039769.jpg",
--- a/tests/pipelines/test_pipelines_image_feature_extraction.py
+++ b/tests/pipelines/test_pipelines_image_feature_extraction.py
@@ -157,7 +157,7 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase):
        outputs = feature_extractor(img, return_tensors=True)
        self.assertTrue(tf.is_tensor(outputs))

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if processor is None:
            self.skipTest(reason="No image processor")

@@ -175,7 +175,9 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase):
                """
            )

-        feature_extractor = ImageFeatureExtractionPipeline(model=model, image_processor=processor)
+        feature_extractor = ImageFeatureExtractionPipeline(
+            model=model, image_processor=processor, torch_dtype=torch_dtype
+        )
        img = prepare_img()
        return feature_extractor, [img, img]

--- a/tests/pipelines/test_pipelines_image_segmentation.py
+++ b/tests/pipelines/test_pipelines_image_segmentation.py
@@ -87,8 +87,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
        + (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
    )

-    def get_test_pipeline(self, model, tokenizer, processor):
-        image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
        return image_segmenter, [
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_image_to_image.py
+++ b/tests/pipelines/test_pipelines_image_to_image.py
@@ -54,9 +54,9 @@ class ImageToImagePipelineTests(unittest.TestCase):
    @require_torch
    @require_vision
    @slow
-    def test_pipeline(self):
+    def test_pipeline(self, torch_dtype="float32"):
        model_id = "caidas/swin2SR-classical-sr-x2-64"
-        upscaler = pipeline("image-to-image", model=model_id)
+        upscaler = pipeline("image-to-image", model=model_id, torch_dtype=torch_dtype)
        upscaled_list = upscaler(self.examples)

        self.assertEqual(len(upscaled_list), len(self.examples))
@@ -66,6 +66,12 @@ class ImageToImagePipelineTests(unittest.TestCase):
        self.assertEqual(upscaled_list[0].size, (1296, 976))
        self.assertEqual(upscaled_list[1].size, (1296, 976))

+    @require_torch
+    @require_vision
+    @slow
+    def test_pipeline_fp16(self):
+        self.test_pipeline(torch_dtype="float16")
+
    @require_torch
    @require_vision
    @slow
--- a/tests/pipelines/test_pipelines_image_to_text.py
+++ b/tests/pipelines/test_pipelines_image_to_text.py
@@ -45,8 +45,10 @@ class ImageToTextPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
    tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        pipe = pipeline(
+            "image-to-text", model=model, tokenizer=tokenizer, image_processor=processor, torch_dtype=torch_dtype
+        )
        examples = [
            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_mask_generation.py
+++ b/tests/pipelines/test_pipelines_mask_generation.py
@@ -67,8 +67,8 @@ class MaskGenerationPipelineTests(unittest.TestCase):
        (list(TF_MODEL_FOR_MASK_GENERATION_MAPPING.items()) if TF_MODEL_FOR_MASK_GENERATION_MAPPING else [])
    )

-    def get_test_pipeline(self, model, tokenizer, processor):
-        image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
        return image_segmenter, [
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_object_detection.py
+++ b/tests/pipelines/test_pipelines_object_detection.py
@@ -53,8 +53,8 @@ else:
 class ObjectDetectionPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        object_detector = ObjectDetectionPipeline(model=model, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        object_detector = ObjectDetectionPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
        return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]

    def run_pipeline_test(self, object_detector, examples):
--- a/tests/pipelines/test_pipelines_question_answering.py
+++ b/tests/pipelines/test_pipelines_question_answering.py
@@ -50,12 +50,12 @@ class QAPipelineTests(unittest.TestCase):
            config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
        }

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if isinstance(model.config, LxmertConfig):
            # This is an bimodal model, we need to find a more consistent way
            # to switch on those models.
            return None, None
-        question_answerer = QuestionAnsweringPipeline(model, tokenizer)
+        question_answerer = QuestionAnsweringPipeline(model, tokenizer, torch_dtype=torch_dtype)

        examples = [
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
--- a/tests/pipelines/test_pipelines_summarization.py
+++ b/tests/pipelines/test_pipelines_summarization.py
@@ -32,8 +32,8 @@ class SummarizationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]

    def run_pipeline_test(self, summarizer, _):
--- a/tests/pipelines/test_pipelines_table_question_answering.py
+++ b/tests/pipelines/test_pipelines_table_question_answering.py
@@ -152,9 +152,9 @@ class TQAPipelineTests(unittest.TestCase):

    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
    @require_torch
-    def test_small_model_pt(self):
+    def test_small_model_pt(self, torch_dtype="float32"):
        model_id = "lysandre/tiny-tapas-random-wtq"
-        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, torch_dtype=torch_dtype)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.assertIsInstance(model.config.aggregation_labels, dict)
        self.assertIsInstance(model.config.no_aggregation_label_index, int)
@@ -255,9 +255,14 @@ class TQAPipelineTests(unittest.TestCase):

    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
    @require_torch
-    def test_slow_tokenizer_sqa_pt(self):
+    def test_small_model_pt_fp16(self):
+        self.test_small_model_pt(torch_dtype="float16")
+
+    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
+    @require_torch
+    def test_slow_tokenizer_sqa_pt(self, torch_dtype="float32"):
        model_id = "lysandre/tiny-tapas-random-sqa"
-        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, torch_dtype=torch_dtype)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)

@@ -373,6 +378,11 @@ class TQAPipelineTests(unittest.TestCase):
                },
            )

+    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
+    @require_torch
+    def test_slow_tokenizer_sqa_pt_fp16(self):
+        self.test_slow_tokenizer_sqa_pt(torch_dtype="float16")
+
    @require_tf
    @require_tensorflow_probability
    @require_pandas
@@ -498,8 +508,8 @@ class TQAPipelineTests(unittest.TestCase):
    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
    @slow
    @require_torch
-    def test_integration_wtq_pt(self):
-        table_querier = pipeline("table-question-answering")
+    def test_integration_wtq_pt(self, torch_dtype="float32"):
+        table_querier = pipeline("table-question-answering", torch_dtype=torch_dtype)

        data = {
            "Repository": ["Transformers", "Datasets", "Tokenizers"],
@@ -541,6 +551,12 @@ class TQAPipelineTests(unittest.TestCase):
        ]
        self.assertListEqual(results, expected_results)

+    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
+    @slow
+    @require_torch
+    def test_integration_wtq_pt_fp16(self):
+        self.test_integration_wtq_pt(torch_dtype="float16")
+
    @slow
    @require_tensorflow_probability
    @require_pandas
@@ -593,11 +609,12 @@ class TQAPipelineTests(unittest.TestCase):
    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
    @slow
    @require_torch
-    def test_integration_sqa_pt(self):
+    def test_integration_sqa_pt(self, torch_dtype="float32"):
        table_querier = pipeline(
            "table-question-answering",
            model="google/tapas-base-finetuned-sqa",
            tokenizer="google/tapas-base-finetuned-sqa",
+            torch_dtype=torch_dtype,
        )
        data = {
            "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
@@ -615,6 +632,12 @@ class TQAPipelineTests(unittest.TestCase):
        ]
        self.assertListEqual(results, expected_results)

+    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
+    @slow
+    @require_torch
+    def test_integration_sqa_pt_fp16(self):
+        self.test_integration_sqa_pt(torch_dtype="float16")
+
    @slow
    @require_tensorflow_probability
    @require_pandas
@@ -645,11 +668,12 @@ class TQAPipelineTests(unittest.TestCase):

    @slow
    @require_torch
-    def test_large_model_pt_tapex(self):
+    def test_large_model_pt_tapex(self, torch_dtype="float32"):
        model_id = "microsoft/tapex-large-finetuned-wtq"
        table_querier = pipeline(
            "table-question-answering",
            model=model_id,
+            torch_dtype=torch_dtype,
        )
        data = {
            "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
--- a/tests/pipelines/test_pipelines_text2text_generation.py
+++ b/tests/pipelines/test_pipelines_text2text_generation.py
@@ -35,8 +35,8 @@ class Text2TextGenerationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return generator, ["Something to write", "Something else"]

    def run_pipeline_test(self, generator, _):
--- a/tests/pipelines/test_pipelines_text_classification.py
+++ b/tests/pipelines/test_pipelines_text_classification.py
@@ -179,8 +179,8 @@ class TextClassificationPipelineTests(unittest.TestCase):
        outputs = text_classifier("Birds are a type of animal")
        self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])

-    def get_test_pipeline(self, model, tokenizer, processor):
-        text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return text_classifier, ["HuggingFace is in", "This is another test"]

    def run_pipeline_test(self, text_classifier, _):
--- a/tests/pipelines/test_pipelines_text_generation.py
+++ b/tests/pipelines/test_pipelines_text_generation.py
@@ -320,8 +320,8 @@ class TextGenerationPipelineTests(unittest.TestCase):
            ],
        )

-    def get_test_pipeline(self, model, tokenizer, processor):
-        text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return text_generator, ["This is a test", "Another test"]

    def test_stop_sequence_stopping_criteria(self):
--- a/tests/pipelines/test_pipelines_text_to_audio.py
+++ b/tests/pipelines/test_pipelines_text_to_audio.py
@@ -250,8 +250,8 @@ class TextToAudioPipelineTests(unittest.TestCase):
        outputs = music_generator("This is a test", forward_params=forward_params, generate_kwargs=generate_kwargs)
        self.assertListEqual(outputs["audio"].tolist(), audio.tolist())

-    def get_test_pipeline(self, model, tokenizer, processor):
-        speech_generator = TextToAudioPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        speech_generator = TextToAudioPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return speech_generator, ["This is a test", "Another test"]

    def run_pipeline_test(self, speech_generator, _):
--- a/tests/pipelines/test_pipelines_token_classification.py
+++ b/tests/pipelines/test_pipelines_token_classification.py
@@ -56,8 +56,8 @@ class TokenClassificationPipelineTests(unittest.TestCase):
            config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
        }

-    def get_test_pipeline(self, model, tokenizer, processor):
-        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]

    def run_pipeline_test(self, token_classifier, _):
--- a/tests/pipelines/test_pipelines_translation.py
+++ b/tests/pipelines/test_pipelines_translation.py
@@ -35,12 +35,14 @@ class TranslationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if isinstance(model.config, MBartConfig):
            src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
-            translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
+            translator = TranslationPipeline(
+                model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, torch_dtype=torch_dtype
+            )
        else:
-            translator = TranslationPipeline(model=model, tokenizer=tokenizer)
+            translator = TranslationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return translator, ["Some string", "Some other text"]

    def run_pipeline_test(self, translator, _):
--- a/tests/pipelines/test_pipelines_video_classification.py
+++ b/tests/pipelines/test_pipelines_video_classification.py
@@ -38,11 +38,13 @@ from .test_pipelines_common import ANY
 class VideoClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        example_video_filepath = hf_hub_download(
            repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
        )
-        video_classifier = VideoClassificationPipeline(model=model, image_processor=processor, top_k=2)
+        video_classifier = VideoClassificationPipeline(
+            model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype
+        )
        examples = [
            example_video_filepath,
            "https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4",
--- a/tests/pipelines/test_pipelines_visual_question_answering.py
+++ b/tests/pipelines/test_pipelines_visual_question_answering.py
@@ -55,8 +55,10 @@ else:
 class VisualQuestionAnsweringPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        vqa_pipeline = pipeline(
+            "visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa", torch_dtype=torch_dtype
+        )
        examples = [
            {
                "image": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
--- a/tests/pipelines/test_pipelines_zero_shot.py
+++ b/tests/pipelines/test_pipelines_zero_shot.py
@@ -42,9 +42,9 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase):
            config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
        }

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        classifier = ZeroShotClassificationPipeline(
-            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
+            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"], torch_dtype=torch_dtype
        )
        return classifier, ["Who are you voting for in 2020?", "My stomach hurts."]

--- a/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
@@ -28,9 +28,11 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
    # model_mapping = {CLAPConfig: CLAPModel}

    @require_torch
-    def test_small_model_pt(self):
+    def test_small_model_pt(self, torch_dtype="float32"):
        audio_classifier = pipeline(
-            task="zero-shot-audio-classification", model="hf-internal-testing/tiny-clap-htsat-unfused"
+            task="zero-shot-audio-classification",
+            model="hf-internal-testing/tiny-clap-htsat-unfused",
+            torch_dtype=torch_dtype,
        )
        dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
        audio = dataset["train"]["audio"][-1]["array"]
@@ -40,6 +42,10 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
            [{"score": 0.501, "label": "Sound of a dog"}, {"score": 0.499, "label": "Sound of vaccum cleaner"}],
        )

+    @require_torch
+    def test_small_model_pt_fp16(self):
+        self.test_small_model_pt(torch_dtype="float16")
+
    @unittest.skip(reason="No models are available in TF")
    def test_small_model_tf(self):
        pass
--- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py
@@ -71,9 +71,9 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
    #     outputs = pipe([image] * 3, batch_size=2, candidate_labels=["A", "B"])

    @require_torch
-    def test_small_model_pt(self):
+    def test_small_model_pt(self, torch_dtype="float32"):
        image_classifier = pipeline(
-            model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
+            model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", torch_dtype=torch_dtype
        )
        image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
        output = image_classifier(image, candidate_labels=["a", "b", "c"])
@@ -127,6 +127,10 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
            ],
        )

+    @require_torch
+    def test_small_model_pt_fp16(self):
+        self.test_small_model_pt(torch_dtype="float16")
+
    @require_tf
    def test_small_model_tf(self):
        image_classifier = pipeline(
--- a/tests/pipelines/test_pipelines_zero_shot_object_detection.py
+++ b/tests/pipelines/test_pipelines_zero_shot_object_detection.py
@@ -43,9 +43,11 @@ else:
 class ZeroShotObjectDetectionPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        object_detector = pipeline(
-            "zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection"
+            "zero-shot-object-detection",
+            model="hf-internal-testing/tiny-random-owlvit-object-detection",
+            torch_dtype=torch_dtype,
        )

        examples = [
--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -126,16 +126,18 @@ class PipelineTesterMixin:
    pipeline_model_mapping = None
    supported_frameworks = ["pt", "tf"]

-    def run_task_tests(self, task):
+    def run_task_tests(self, task, torch_dtype="float32"):
        """Run pipeline tests for a specific `task`

        Args:
            task (`str`):
                A task name. This should be a key in the mapping `pipeline_test_mapping`.
+            torch_dtype (`str`, `optional`, defaults to `'float32'`):
+                The torch dtype to use for the model. Can be used for FP16/other precision inference.
        """
        if task not in self.pipeline_model_mapping:
            self.skipTest(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: `{task}` is not in "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: `{task}` is not in "
                f"`self.pipeline_model_mapping` for `{self.__class__.__name__}`."
            )

@@ -171,10 +173,12 @@ class PipelineTesterMixin:
                repo_name = model_arch_name

            self.run_model_pipeline_tests(
-                task, repo_name, model_architecture, tokenizer_names, processor_names, commit
+                task, repo_name, model_architecture, tokenizer_names, processor_names, commit, torch_dtype
            )

-    def run_model_pipeline_tests(self, task, repo_name, model_architecture, tokenizer_names, processor_names, commit):
+    def run_model_pipeline_tests(
+        self, task, repo_name, model_architecture, tokenizer_names, processor_names, commit, torch_dtype="float32"
+    ):
        """Run pipeline tests for a specific `task` with the give model class and tokenizer/processor class names

        Args:
@@ -188,6 +192,10 @@ class PipelineTesterMixin:
                A list of names of a subclasses of `PreTrainedTokenizerFast` or `PreTrainedTokenizer`.
            processor_names (`List[str]`):
                A list of names of subclasses of `BaseImageProcessor` or `FeatureExtractionMixin`.
+            commit (`str`):
+                The commit hash of the model repository on the Hub.
+            torch_dtype (`str`, `optional`, defaults to `'float32'`):
+                The torch dtype to use for the model. Can be used for FP16/other precision inference.
        """
        # Get an instance of the corresponding class `XXXPipelineTests` in order to use `get_test_pipeline` and
        # `run_pipeline_test`.
@@ -203,14 +211,18 @@ class PipelineTesterMixin:
                    processor_name,
                ):
                    logger.warning(
-                        f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is "
+                        f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: test is "
                        f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer "
                        f"`{tokenizer_name}` | processor `{processor_name}`."
                    )
                    continue
-                self.run_pipeline_test(task, repo_name, model_architecture, tokenizer_name, processor_name, commit)
+                self.run_pipeline_test(
+                    task, repo_name, model_architecture, tokenizer_name, processor_name, commit, torch_dtype
+                )

-    def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, processor_name, commit):
+    def run_pipeline_test(
+        self, task, repo_name, model_architecture, tokenizer_name, processor_name, commit, torch_dtype="float32"
+    ):
        """Run pipeline tests for a specific `task` with the give model class and tokenizer/processor class name

        The model will be loaded from a model repository on the Hub.
@@ -226,6 +238,10 @@ class PipelineTesterMixin:
                The name of a subclass of `PreTrainedTokenizerFast` or `PreTrainedTokenizer`.
            processor_name (`str`):
                The name of a subclass of `BaseImageProcessor` or `FeatureExtractionMixin`.
+            commit (`str`):
+                The commit hash of the model repository on the Hub.
+            torch_dtype (`str`, `optional`, defaults to `'float32'`):
+                The torch dtype to use for the model. Can be used for FP16/other precision inference.
        """
        repo_id = f"{TRANSFORMERS_TINY_MODEL_PATH}/{repo_name}"
        if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing":
@@ -245,7 +261,7 @@ class PipelineTesterMixin:
                processor = processor_class.from_pretrained(repo_id, revision=commit)
            except Exception:
                logger.warning(
-                    f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not load the "
+                    f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not load the "
                    f"processor from `{repo_id}` with `{processor_name}`."
                )
                self.skipTest(f"Could not load the processor from {repo_id} with {processor_name}.")
@@ -253,7 +269,7 @@ class PipelineTesterMixin:
        # TODO: Maybe not upload such problematic tiny models to Hub.
        if tokenizer is None and processor is None:
            logger.warning(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not find or load "
                f"any tokenizer / processor from `{repo_id}`."
            )
            self.skipTest(f"Could not find or load any tokenizer / processor from {repo_id}.")
@@ -263,7 +279,7 @@ class PipelineTesterMixin:
            model = model_architecture.from_pretrained(repo_id, revision=commit)
        except Exception:
            logger.warning(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not find or load "
                f"the model from `{repo_id}` with `{model_architecture}`."
            )
            self.skipTest(f"Could not find or load the model from {repo_id} with {model_architecture}.")
@@ -271,7 +287,7 @@ class PipelineTesterMixin:
        pipeline_test_class_name = pipeline_test_mapping[task]["test"].__name__
        if self.is_pipeline_test_to_skip_more(pipeline_test_class_name, model.config, model, tokenizer, processor):
            logger.warning(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: test is "
                f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer "
                f"`{tokenizer_name}` | processor `{processor_name}`."
            )
@@ -289,12 +305,12 @@ class PipelineTesterMixin:
        # `run_pipeline_test`.
        task_test = pipeline_test_mapping[task]["test"]()

-        pipeline, examples = task_test.get_test_pipeline(model, tokenizer, processor)
+        pipeline, examples = task_test.get_test_pipeline(model, tokenizer, processor, torch_dtype=torch_dtype)
        if pipeline is None:
            # The test can disable itself, but it should be very marginal
            # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
            logger.warning(
-                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not get the "
+                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not get the "
                "pipeline for testing."
            )
            self.skipTest(reason="Could not get the pipeline for testing.")
@@ -324,10 +340,20 @@ class PipelineTesterMixin:
    def test_pipeline_audio_classification(self):
        self.run_task_tests(task="audio-classification")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_audio_classification_fp16(self):
+        self.run_task_tests(task="audio-classification", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_automatic_speech_recognition(self):
        self.run_task_tests(task="automatic-speech-recognition")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_automatic_speech_recognition_fp16(self):
+        self.run_task_tests(task="automatic-speech-recognition", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    @require_timm
@@ -335,6 +361,13 @@ class PipelineTesterMixin:
    def test_pipeline_depth_estimation(self):
        self.run_task_tests(task="depth-estimation")

+    @is_pipeline_test
+    @require_vision
+    @require_timm
+    @require_torch
+    def test_pipeline_depth_estimation_fp16(self):
+        self.run_task_tests(task="depth-estimation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_pytesseract
    @require_torch
@@ -342,20 +375,43 @@ class PipelineTesterMixin:
    def test_pipeline_document_question_answering(self):
        self.run_task_tests(task="document-question-answering")

+    @is_pipeline_test
+    @require_pytesseract
+    @require_torch
+    @require_vision
+    def test_pipeline_document_question_answering_fp16(self):
+        self.run_task_tests(task="document-question-answering", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_feature_extraction(self):
        self.run_task_tests(task="feature-extraction")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_feature_extraction_fp16(self):
+        self.run_task_tests(task="feature-extraction", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_fill_mask(self):
        self.run_task_tests(task="fill-mask")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_fill_mask_fp16(self):
+        self.run_task_tests(task="fill-mask", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch_or_tf
    @require_vision
    def test_pipeline_image_classification(self):
        self.run_task_tests(task="image-classification")

+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_image_classification_fp16(self):
+        self.run_task_tests(task="image-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    @require_timm
@@ -363,11 +419,24 @@ class PipelineTesterMixin:
    def test_pipeline_image_segmentation(self):
        self.run_task_tests(task="image-segmentation")

+    @is_pipeline_test
+    @require_vision
+    @require_timm
+    @require_torch
+    def test_pipeline_image_segmentation_fp16(self):
+        self.run_task_tests(task="image-segmentation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    def test_pipeline_image_to_text(self):
        self.run_task_tests(task="image-to-text")

+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_image_to_text_fp16(self):
+        self.run_task_tests(task="image-to-text", torch_dtype="float16")
+
    @is_pipeline_test
    @require_timm
    @require_vision
@@ -375,6 +444,13 @@ class PipelineTesterMixin:
    def test_pipeline_image_feature_extraction(self):
        self.run_task_tests(task="image-feature-extraction")

+    @is_pipeline_test
+    @require_timm
+    @require_vision
+    @require_torch
+    def test_pipeline_image_feature_extraction_fp16(self):
+        self.run_task_tests(task="image-feature-extraction", torch_dtype="float16")
+
    @unittest.skip(reason="`run_pipeline_test` is currently not implemented.")
    @is_pipeline_test
    @require_vision
@@ -382,6 +458,13 @@ class PipelineTesterMixin:
    def test_pipeline_mask_generation(self):
        self.run_task_tests(task="mask-generation")

+    @unittest.skip(reason="`run_pipeline_test` is currently not implemented.")
+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_mask_generation_fp16(self):
+        self.run_task_tests(task="mask-generation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    @require_timm
@@ -389,44 +472,96 @@ class PipelineTesterMixin:
    def test_pipeline_object_detection(self):
        self.run_task_tests(task="object-detection")

+    @is_pipeline_test
+    @require_vision
+    @require_timm
+    @require_torch
+    def test_pipeline_object_detection_fp16(self):
+        self.run_task_tests(task="object-detection", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_question_answering(self):
        self.run_task_tests(task="question-answering")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_question_answering_fp16(self):
+        self.run_task_tests(task="question-answering", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_summarization(self):
        self.run_task_tests(task="summarization")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_summarization_fp16(self):
+        self.run_task_tests(task="summarization", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_table_question_answering(self):
        self.run_task_tests(task="table-question-answering")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_table_question_answering_fp16(self):
+        self.run_task_tests(task="table-question-answering", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_text2text_generation(self):
        self.run_task_tests(task="text2text-generation")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_text2text_generation_fp16(self):
+        self.run_task_tests(task="text2text-generation", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_text_classification(self):
        self.run_task_tests(task="text-classification")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_text_classification_fp16(self):
+        self.run_task_tests(task="text-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch_or_tf
    def test_pipeline_text_generation(self):
        self.run_task_tests(task="text-generation")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_text_generation_fp16(self):
+        self.run_task_tests(task="text-generation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch
    def test_pipeline_text_to_audio(self):
        self.run_task_tests(task="text-to-audio")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_text_to_audio_fp16(self):
+        self.run_task_tests(task="text-to-audio", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_token_classification(self):
        self.run_task_tests(task="token-classification")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_token_classification_fp16(self):
+        self.run_task_tests(task="token-classification", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_translation(self):
        self.run_task_tests(task="translation")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_translation_fp16(self):
+        self.run_task_tests(task="translation", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch_or_tf
    @require_vision
@@ -434,32 +569,67 @@ class PipelineTesterMixin:
    def test_pipeline_video_classification(self):
        self.run_task_tests(task="video-classification")

+    @is_pipeline_test
+    @require_vision
+    @require_decord
+    @require_torch
+    def test_pipeline_video_classification_fp16(self):
+        self.run_task_tests(task="video-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch
    @require_vision
    def test_pipeline_visual_question_answering(self):
        self.run_task_tests(task="visual-question-answering")

+    @is_pipeline_test
+    @require_torch
+    @require_vision
+    def test_pipeline_visual_question_answering_fp16(self):
+        self.run_task_tests(task="visual-question-answering", torch_dtype="float16")
+
    @is_pipeline_test
    def test_pipeline_zero_shot(self):
        self.run_task_tests(task="zero-shot")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_zero_shot_fp16(self):
+        self.run_task_tests(task="zero-shot", torch_dtype="float16")
+
    @is_pipeline_test
    @require_torch
    def test_pipeline_zero_shot_audio_classification(self):
        self.run_task_tests(task="zero-shot-audio-classification")

+    @is_pipeline_test
+    @require_torch
+    def test_pipeline_zero_shot_audio_classification_fp16(self):
+        self.run_task_tests(task="zero-shot-audio-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    def test_pipeline_zero_shot_image_classification(self):
        self.run_task_tests(task="zero-shot-image-classification")

+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_zero_shot_image_classification_fp16(self):
+        self.run_task_tests(task="zero-shot-image-classification", torch_dtype="float16")
+
    @is_pipeline_test
    @require_vision
    @require_torch
    def test_pipeline_zero_shot_object_detection(self):
        self.run_task_tests(task="zero-shot-object-detection")

+    @is_pipeline_test
+    @require_vision
+    @require_torch
+    def test_pipeline_zero_shot_object_detection_fp16(self):
+        self.run_task_tests(task="zero-shot-object-detection", torch_dtype="float16")
+
    # This contains the test cases to be skipped without model architecture being involved.
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name