Allow FP16 or other precision inference for Pipelines (#31342)

* cast image features to model.dtype where needed to support FP16 or other precision in pipelines * Update src/transformers/pipelines/image_feature_extraction.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Use .to instead * Add FP16 pipeline support for zeroshot audio classification * Remove unused torch imports * Add docs on FP16 pipeline * Remove unused import * Add FP16 tests to pipeline mixin * Add fp16 placeholder for mask_generation pipeline test * Add FP16 tests for all pipelines * Fix formatting * Remove torch_dtype arg from is_pipeline_test_to_skip* * Fix format * trigger ci --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2024-07-06 00:21:50 +08:00
parent e786844425
commit ac26260436
45 changed files with 354 additions and 79 deletions
--- a/tests/pipelines/test_pipelines_video_classification.py
+++ b/tests/pipelines/test_pipelines_video_classification.py
@@ -38,11 +38,13 @@ from .test_pipelines_common import ANY
 class VideoClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, processor):
+    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):
        example_video_filepath = hf_hub_download(
            repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
        )
-        video_classifier = VideoClassificationPipeline(model=model, image_processor=processor, top_k=2)
+        video_classifier = VideoClassificationPipeline(
+            model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype
+        )
        examples = [
            example_video_filepath,
            "https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4",