Allow FP16 or other precision inference for Pipelines (#31342)

* cast image features to model.dtype where needed to support FP16 or other precision in pipelines * Update src/transformers/pipelines/image_feature_extraction.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Use .to instead * Add FP16 pipeline support for zeroshot audio classification * Remove unused torch imports * Add docs on FP16 pipeline * Remove unused import * Add FP16 tests to pipeline mixin * Add fp16 placeholder for mask_generation pipeline test * Add FP16 tests for all pipelines * Fix formatting * Remove torch_dtype arg from is_pipeline_test_to_skip* * Fix format * trigger ci --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2024-07-06 00:21:50 +08:00
parent e786844425
commit ac26260436
45 changed files with 354 additions and 79 deletions
--- a/tests/pipelines/test_pipelines_audio_classification.py
+++ b/tests/pipelines/test_pipelines_audio_classification.py
@@ -35,8 +35,10 @@ class AudioClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        audio_classifier = AudioClassificationPipeline(
+            model=model, feature_extractor=processor, torch_dtype=torch_dtype
+        )

        # test with a raw waveform
        audio = np.zeros((34000,))
--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -66,14 +66,14 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
    )

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if tokenizer is None:
            # Side effect of no Fast Tokenizer class for these model, so skipping
            # But the slow tokenizer test should still run as they're quite small
            self.skipTest(reason="No tokenizer available")

        speech_recognizer = AutomaticSpeechRecognitionPipeline(
-            model=model, tokenizer=tokenizer, feature_extractor=processor
+            model=model, tokenizer=tokenizer, feature_extractor=processor, torch_dtype=torch_dtype
        )

        # test with a raw waveform
--- a/tests/pipelines/test_pipelines_depth_estimation.py
+++ b/tests/pipelines/test_pipelines_depth_estimation.py
@@ -56,8 +56,8 @@ def hashimage(image: Image) -> str:
 class DepthEstimationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        depth_estimator = DepthEstimationPipeline(model=model, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        depth_estimator = DepthEstimationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
        return depth_estimator, [
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_document_question_answering.py
+++ b/tests/pipelines/test_pipelines_document_question_answering.py
@@ -61,9 +61,13 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase):

    @require_pytesseract
    @require_vision
-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        dqa_pipeline = pipeline(
-            "document-question-answering", model=model, tokenizer=tokenizer, image_processor=processor
+            "document-question-answering",
+            model=model,
+            tokenizer=tokenizer,
+            image_processor=processor,
+            torch_dtype=torch_dtype,
        )

        image = INVOICE_URL
--- a/tests/pipelines/test_pipelines_feature_extraction.py
+++ b/tests/pipelines/test_pipelines_feature_extraction.py
@@ -174,7 +174,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase):
            raise ValueError("We expect lists of floats, nothing else")
        return shape

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if tokenizer is None:
            self.skipTest(reason="No tokenizer")
        elif (
@@ -193,7 +193,9 @@ class FeatureExtractionPipelineTests(unittest.TestCase):
                For now ignore those.
                """
            )
-        feature_extractor = FeatureExtractionPipeline(model=model, tokenizer=tokenizer, feature_extractor=processor)
+        feature_extractor = FeatureExtractionPipeline(
+            model=model, tokenizer=tokenizer, feature_extractor=processor, torch_dtype=torch_dtype
+        )
        return feature_extractor, ["This is a test", "This is another test"]

    def run_pipeline_test(self, feature_extractor, examples):
--- a/tests/pipelines/test_pipelines_fill_mask.py
+++ b/tests/pipelines/test_pipelines_fill_mask.py
@@ -251,11 +251,11 @@ class FillMaskPipelineTests(unittest.TestCase):
        unmasker.tokenizer.pad_token = None
        self.run_pipeline_test(unmasker, [])

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if tokenizer is None or tokenizer.mask_token_id is None:
            self.skipTest(reason="The provided tokenizer has no mask token, (probably reformer or wav2vec2)")

-        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
+        fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        examples = [
            f"This is another {tokenizer.mask_token} test",
        ]
--- a/tests/pipelines/test_pipelines_image_classification.py
+++ b/tests/pipelines/test_pipelines_image_classification.py
@@ -55,8 +55,10 @@ class ImageClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        image_classifier = ImageClassificationPipeline(model=model, image_processor=processor, top_k=2)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        image_classifier = ImageClassificationPipeline(
+            model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype
+        )
        examples = [
            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
            "http://images.cocodataset.org/val2017/000000039769.jpg",
--- a/tests/pipelines/test_pipelines_image_feature_extraction.py
+++ b/tests/pipelines/test_pipelines_image_feature_extraction.py
@@ -157,7 +157,7 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase):
        outputs = feature_extractor(img, return_tensors=True)
        self.assertTrue(tf.is_tensor(outputs))

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if processor is None:
            self.skipTest(reason="No image processor")

@@ -175,7 +175,9 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase):
                """
            )

-        feature_extractor = ImageFeatureExtractionPipeline(model=model, image_processor=processor)
+        feature_extractor = ImageFeatureExtractionPipeline(
+            model=model, image_processor=processor, torch_dtype=torch_dtype
+        )
        img = prepare_img()
        return feature_extractor, [img, img]

--- a/tests/pipelines/test_pipelines_image_segmentation.py
+++ b/tests/pipelines/test_pipelines_image_segmentation.py
@@ -87,8 +87,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
        + (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
    )

-    def get_test_pipeline(self, model, tokenizer, processor):
-        image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
        return image_segmenter, [
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_image_to_image.py
+++ b/tests/pipelines/test_pipelines_image_to_image.py
@@ -54,9 +54,9 @@ class ImageToImagePipelineTests(unittest.TestCase):
    @require_torch
    @require_vision
    @slow
-    def test_pipeline(self):
+    def test_pipeline(self, torch_dtype="float32"):
        model_id = "caidas/swin2SR-classical-sr-x2-64"
-        upscaler = pipeline("image-to-image", model=model_id)
+        upscaler = pipeline("image-to-image", model=model_id, torch_dtype=torch_dtype)
        upscaled_list = upscaler(self.examples)

        self.assertEqual(len(upscaled_list), len(self.examples))
@@ -66,6 +66,12 @@ class ImageToImagePipelineTests(unittest.TestCase):
        self.assertEqual(upscaled_list[0].size, (1296, 976))
        self.assertEqual(upscaled_list[1].size, (1296, 976))

+    @require_torch
+    @require_vision
+    @slow
+    def test_pipeline_fp16(self):
+        self.test_pipeline(torch_dtype="float16")
+
    @require_torch
    @require_vision
    @slow
--- a/tests/pipelines/test_pipelines_image_to_text.py
+++ b/tests/pipelines/test_pipelines_image_to_text.py
@@ -45,8 +45,10 @@ class ImageToTextPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
    tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        pipe = pipeline(
+            "image-to-text", model=model, tokenizer=tokenizer, image_processor=processor, torch_dtype=torch_dtype
+        )
        examples = [
            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_mask_generation.py
+++ b/tests/pipelines/test_pipelines_mask_generation.py
@@ -67,8 +67,8 @@ class MaskGenerationPipelineTests(unittest.TestCase):
        (list(TF_MODEL_FOR_MASK_GENERATION_MAPPING.items()) if TF_MODEL_FOR_MASK_GENERATION_MAPPING else [])
    )

-    def get_test_pipeline(self, model, tokenizer, processor):
-        image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
        return image_segmenter, [
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_object_detection.py
+++ b/tests/pipelines/test_pipelines_object_detection.py
@@ -53,8 +53,8 @@ else:
 class ObjectDetectionPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        object_detector = ObjectDetectionPipeline(model=model, image_processor=processor)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        object_detector = ObjectDetectionPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype)
        return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]

    def run_pipeline_test(self, object_detector, examples):
--- a/tests/pipelines/test_pipelines_question_answering.py
+++ b/tests/pipelines/test_pipelines_question_answering.py
@@ -50,12 +50,12 @@ class QAPipelineTests(unittest.TestCase):
            config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
        }

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if isinstance(model.config, LxmertConfig):
            # This is an bimodal model, we need to find a more consistent way
            # to switch on those models.
            return None, None
-        question_answerer = QuestionAnsweringPipeline(model, tokenizer)
+        question_answerer = QuestionAnsweringPipeline(model, tokenizer, torch_dtype=torch_dtype)

        examples = [
            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
--- a/tests/pipelines/test_pipelines_summarization.py
+++ b/tests/pipelines/test_pipelines_summarization.py
@@ -32,8 +32,8 @@ class SummarizationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]

    def run_pipeline_test(self, summarizer, _):
--- a/tests/pipelines/test_pipelines_table_question_answering.py
+++ b/tests/pipelines/test_pipelines_table_question_answering.py
@@ -152,9 +152,9 @@ class TQAPipelineTests(unittest.TestCase):

    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
    @require_torch
-    def test_small_model_pt(self):
+    def test_small_model_pt(self, torch_dtype="float32"):
        model_id = "lysandre/tiny-tapas-random-wtq"
-        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, torch_dtype=torch_dtype)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.assertIsInstance(model.config.aggregation_labels, dict)
        self.assertIsInstance(model.config.no_aggregation_label_index, int)
@@ -255,9 +255,14 @@ class TQAPipelineTests(unittest.TestCase):

    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
    @require_torch
-    def test_slow_tokenizer_sqa_pt(self):
+    def test_small_model_pt_fp16(self):
+        self.test_small_model_pt(torch_dtype="float16")
+
+    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
+    @require_torch
+    def test_slow_tokenizer_sqa_pt(self, torch_dtype="float32"):
        model_id = "lysandre/tiny-tapas-random-sqa"
-        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
+        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, torch_dtype=torch_dtype)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer)

@@ -373,6 +378,11 @@ class TQAPipelineTests(unittest.TestCase):
                },
            )

+    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
+    @require_torch
+    def test_slow_tokenizer_sqa_pt_fp16(self):
+        self.test_slow_tokenizer_sqa_pt(torch_dtype="float16")
+
    @require_tf
    @require_tensorflow_probability
    @require_pandas
@@ -498,8 +508,8 @@ class TQAPipelineTests(unittest.TestCase):
    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
    @slow
    @require_torch
-    def test_integration_wtq_pt(self):
-        table_querier = pipeline("table-question-answering")
+    def test_integration_wtq_pt(self, torch_dtype="float32"):
+        table_querier = pipeline("table-question-answering", torch_dtype=torch_dtype)

        data = {
            "Repository": ["Transformers", "Datasets", "Tokenizers"],
@@ -541,6 +551,12 @@ class TQAPipelineTests(unittest.TestCase):
        ]
        self.assertListEqual(results, expected_results)

+    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
+    @slow
+    @require_torch
+    def test_integration_wtq_pt_fp16(self):
+        self.test_integration_wtq_pt(torch_dtype="float16")
+
    @slow
    @require_tensorflow_probability
    @require_pandas
@@ -593,11 +609,12 @@ class TQAPipelineTests(unittest.TestCase):
    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
    @slow
    @require_torch
-    def test_integration_sqa_pt(self):
+    def test_integration_sqa_pt(self, torch_dtype="float32"):
        table_querier = pipeline(
            "table-question-answering",
            model="google/tapas-base-finetuned-sqa",
            tokenizer="google/tapas-base-finetuned-sqa",
+            torch_dtype=torch_dtype,
        )
        data = {
            "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
@@ -615,6 +632,12 @@ class TQAPipelineTests(unittest.TestCase):
        ]
        self.assertListEqual(results, expected_results)

+    @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+")
+    @slow
+    @require_torch
+    def test_integration_sqa_pt_fp16(self):
+        self.test_integration_sqa_pt(torch_dtype="float16")
+
    @slow
    @require_tensorflow_probability
    @require_pandas
@@ -645,11 +668,12 @@ class TQAPipelineTests(unittest.TestCase):

    @slow
    @require_torch
-    def test_large_model_pt_tapex(self):
+    def test_large_model_pt_tapex(self, torch_dtype="float32"):
        model_id = "microsoft/tapex-large-finetuned-wtq"
        table_querier = pipeline(
            "table-question-answering",
            model=model_id,
+            torch_dtype=torch_dtype,
        )
        data = {
            "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
--- a/tests/pipelines/test_pipelines_text2text_generation.py
+++ b/tests/pipelines/test_pipelines_text2text_generation.py
@@ -35,8 +35,8 @@ class Text2TextGenerationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return generator, ["Something to write", "Something else"]

    def run_pipeline_test(self, generator, _):
--- a/tests/pipelines/test_pipelines_text_classification.py
+++ b/tests/pipelines/test_pipelines_text_classification.py
@@ -179,8 +179,8 @@ class TextClassificationPipelineTests(unittest.TestCase):
        outputs = text_classifier("Birds are a type of animal")
        self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])

-    def get_test_pipeline(self, model, tokenizer, processor):
-        text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return text_classifier, ["HuggingFace is in", "This is another test"]

    def run_pipeline_test(self, text_classifier, _):
--- a/tests/pipelines/test_pipelines_text_generation.py
+++ b/tests/pipelines/test_pipelines_text_generation.py
@@ -320,8 +320,8 @@ class TextGenerationPipelineTests(unittest.TestCase):
            ],
        )

-    def get_test_pipeline(self, model, tokenizer, processor):
-        text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return text_generator, ["This is a test", "Another test"]

    def test_stop_sequence_stopping_criteria(self):
--- a/tests/pipelines/test_pipelines_text_to_audio.py
+++ b/tests/pipelines/test_pipelines_text_to_audio.py
@@ -250,8 +250,8 @@ class TextToAudioPipelineTests(unittest.TestCase):
        outputs = music_generator("This is a test", forward_params=forward_params, generate_kwargs=generate_kwargs)
        self.assertListEqual(outputs["audio"].tolist(), audio.tolist())

-    def get_test_pipeline(self, model, tokenizer, processor):
-        speech_generator = TextToAudioPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        speech_generator = TextToAudioPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return speech_generator, ["This is a test", "Another test"]

    def run_pipeline_test(self, speech_generator, _):
--- a/tests/pipelines/test_pipelines_token_classification.py
+++ b/tests/pipelines/test_pipelines_token_classification.py
@@ -56,8 +56,8 @@ class TokenClassificationPipelineTests(unittest.TestCase):
            config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
        }

-    def get_test_pipeline(self, model, tokenizer, processor):
-        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]

    def run_pipeline_test(self, token_classifier, _):
--- a/tests/pipelines/test_pipelines_translation.py
+++ b/tests/pipelines/test_pipelines_translation.py
@@ -35,12 +35,14 @@ class TranslationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        if isinstance(model.config, MBartConfig):
            src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
-            translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
+            translator = TranslationPipeline(
+                model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, torch_dtype=torch_dtype
+            )
        else:
-            translator = TranslationPipeline(model=model, tokenizer=tokenizer)
+            translator = TranslationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype)
        return translator, ["Some string", "Some other text"]

    def run_pipeline_test(self, translator, _):
--- a/tests/pipelines/test_pipelines_video_classification.py
+++ b/tests/pipelines/test_pipelines_video_classification.py
@@ -38,11 +38,13 @@ from .test_pipelines_common import ANY
 class VideoClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        example_video_filepath = hf_hub_download(
            repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
        )
-        video_classifier = VideoClassificationPipeline(model=model, image_processor=processor, top_k=2)
+        video_classifier = VideoClassificationPipeline(
+            model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype
+        )
        examples = [
            example_video_filepath,
            "https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4",
--- a/tests/pipelines/test_pipelines_visual_question_answering.py
+++ b/tests/pipelines/test_pipelines_visual_question_answering.py
@@ -55,8 +55,10 @@ else:
 class VisualQuestionAnsweringPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
-        vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
+        vqa_pipeline = pipeline(
+            "visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa", torch_dtype=torch_dtype
+        )
        examples = [
            {
                "image": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
--- a/tests/pipelines/test_pipelines_zero_shot.py
+++ b/tests/pipelines/test_pipelines_zero_shot.py
@@ -42,9 +42,9 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase):
            config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP
        }

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        classifier = ZeroShotClassificationPipeline(
-            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
+            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"], torch_dtype=torch_dtype
        )
        return classifier, ["Who are you voting for in 2020?", "My stomach hurts."]

--- a/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_audio_classification.py
@@ -28,9 +28,11 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
    # model_mapping = {CLAPConfig: CLAPModel}

    @require_torch
-    def test_small_model_pt(self):
+    def test_small_model_pt(self, torch_dtype="float32"):
        audio_classifier = pipeline(
-            task="zero-shot-audio-classification", model="hf-internal-testing/tiny-clap-htsat-unfused"
+            task="zero-shot-audio-classification",
+            model="hf-internal-testing/tiny-clap-htsat-unfused",
+            torch_dtype=torch_dtype,
        )
        dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example")
        audio = dataset["train"]["audio"][-1]["array"]
@@ -40,6 +42,10 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase):
            [{"score": 0.501, "label": "Sound of a dog"}, {"score": 0.499, "label": "Sound of vaccum cleaner"}],
        )

+    @require_torch
+    def test_small_model_pt_fp16(self):
+        self.test_small_model_pt(torch_dtype="float16")
+
    @unittest.skip(reason="No models are available in TF")
    def test_small_model_tf(self):
        pass
--- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py
@@ -71,9 +71,9 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
    #     outputs = pipe([image] * 3, batch_size=2, candidate_labels=["A", "B"])

    @require_torch
-    def test_small_model_pt(self):
+    def test_small_model_pt(self, torch_dtype="float32"):
        image_classifier = pipeline(
-            model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
+            model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", torch_dtype=torch_dtype
        )
        image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
        output = image_classifier(image, candidate_labels=["a", "b", "c"])
@@ -127,6 +127,10 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
            ],
        )

+    @require_torch
+    def test_small_model_pt_fp16(self):
+        self.test_small_model_pt(torch_dtype="float16")
+
    @require_tf
    def test_small_model_tf(self):
        image_classifier = pipeline(
--- a/tests/pipelines/test_pipelines_zero_shot_object_detection.py
+++ b/tests/pipelines/test_pipelines_zero_shot_object_detection.py
@@ -43,9 +43,11 @@ else:
 class ZeroShotObjectDetectionPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        object_detector = pipeline(
-            "zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection"
+            "zero-shot-object-detection",
+            model="hf-internal-testing/tiny-random-owlvit-object-detection",
+            torch_dtype=torch_dtype,
        )

        examples = [