Image pipelines spec compliance (#33899)

* Update many similar visual pipelines

* Add input tests

* Add ImageToText as well

* Add output tests

* Add output tests

* Add output tests

* OutputElement -> Output

* Correctly test elements

* make fixup

* fix typo in the task list

* Fix VQA testing

* Add copyright to image_classification.py

* Revert changes to VQA pipeline because outputs have differences - will move to another PR

* make fixup

* Remove deprecation warnings
This commit is contained in:
Matt
2024-10-08 13:34:28 +01:00
committed by GitHub
parent e2001c3413
commit 3b44d2f042
13 changed files with 152 additions and 43 deletions

View File

@@ -14,11 +14,13 @@
import unittest
from huggingface_hub import DepthEstimationOutput
from huggingface_hub.utils import insecure_hashlib
from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
from transformers.pipelines import DepthEstimationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_tf,
@@ -94,6 +96,9 @@ class DepthEstimationPipelineTests(unittest.TestCase):
outputs,
)
for single_output in outputs:
compare_pipeline_output_to_hub_spec(single_output, DepthEstimationOutput)
@require_tf
@unittest.skip(reason="Depth estimation is not implemented in TF")
def test_small_model_tf(self):

View File

@@ -14,6 +14,8 @@
import unittest
from huggingface_hub import ImageClassificationOutputElement
from transformers import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
@@ -23,6 +25,7 @@ from transformers import (
)
from transformers.pipelines import ImageClassificationPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_tf,
@@ -121,6 +124,10 @@ class ImageClassificationPipelineTests(unittest.TestCase):
],
)
for single_output in outputs:
for output_element in single_output:
compare_pipeline_output_to_hub_spec(output_element, ImageClassificationOutputElement)
@require_torch
def test_small_model_pt(self):
small_model = "hf-internal-testing/tiny-random-vit"

View File

@@ -20,6 +20,7 @@ import datasets
import numpy as np
import requests
from datasets import load_dataset
from huggingface_hub import ImageSegmentationOutputElement
from huggingface_hub.utils import insecure_hashlib
from transformers import (
@@ -36,6 +37,7 @@ from transformers import (
pipeline,
)
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_tf,
@@ -168,6 +170,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
f"Expected [{n}, {n}, {n}, {n}, {n}], got {[len(item) for item in outputs]}",
)
for single_output in outputs:
for output_element in single_output:
compare_pipeline_output_to_hub_spec(output_element, ImageSegmentationOutputElement)
@require_tf
@unittest.skip(reason="Image segmentation not implemented in TF")
def test_small_model_tf(self):

View File

@@ -15,10 +15,12 @@
import unittest
import requests
from huggingface_hub import ImageToTextOutput
from transformers import MODEL_FOR_VISION_2_SEQ_MAPPING, TF_MODEL_FOR_VISION_2_SEQ_MAPPING, is_vision_available
from transformers.pipelines import ImageToTextPipeline, pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
require_tf,
require_torch,
@@ -103,6 +105,9 @@ class ImageToTextPipelineTests(unittest.TestCase):
[{"generated_text": "growth"}],
)
for single_output in outputs:
compare_pipeline_output_to_hub_spec(single_output, ImageToTextOutput)
@require_torch
def test_small_model_pt(self):
pipe = pipeline("image-to-text", model="hf-internal-testing/tiny-random-vit-gpt2")

View File

@@ -14,6 +14,8 @@
import unittest
from huggingface_hub import ObjectDetectionOutputElement
from transformers import (
MODEL_FOR_OBJECT_DETECTION_MAPPING,
AutoFeatureExtractor,
@@ -22,7 +24,8 @@ from transformers import (
is_vision_available,
pipeline,
)
from transformers.testing_utils import (
from transformers.testing_utils import ( #
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_pytesseract,
@@ -101,6 +104,7 @@ class ObjectDetectionPipelineTests(unittest.TestCase):
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
},
)
compare_pipeline_output_to_hub_spec(detected_object, ObjectDetectionOutputElement)
@require_tf
@unittest.skip(reason="Object detection not implemented in TF")

View File

@@ -14,9 +14,12 @@
import unittest
from huggingface_hub import ZeroShotImageClassificationOutputElement
from transformers import is_vision_available
from transformers.pipelines import pipeline
from transformers.testing_utils import (
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
require_tf,
@@ -127,6 +130,9 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
],
)
for single_output in output:
compare_pipeline_output_to_hub_spec(single_output, ZeroShotImageClassificationOutputElement)
@require_torch
def test_small_model_pt_fp16(self):
self.test_small_model_pt(torch_dtype="float16")

View File

@@ -25,9 +25,27 @@ from pathlib import Path
from textwrap import dedent
from typing import get_args
from huggingface_hub import AudioClassificationInput, AutomaticSpeechRecognitionInput
from huggingface_hub import (
AudioClassificationInput,
AutomaticSpeechRecognitionInput,
DepthEstimationInput,
ImageClassificationInput,
ImageSegmentationInput,
ImageToTextInput,
ObjectDetectionInput,
ZeroShotImageClassificationInput,
)
from transformers.pipelines import AudioClassificationPipeline, AutomaticSpeechRecognitionPipeline
from transformers.pipelines import (
AudioClassificationPipeline,
AutomaticSpeechRecognitionPipeline,
DepthEstimationPipeline,
ImageClassificationPipeline,
ImageSegmentationPipeline,
ImageToTextPipeline,
ObjectDetectionPipeline,
ZeroShotImageClassificationPipeline,
)
from transformers.testing_utils import (
is_pipeline_test,
require_decord,
@@ -105,6 +123,12 @@ task_to_pipeline_and_spec_mapping = {
# task spec in the HF Hub
"audio-classification": (AudioClassificationPipeline, AudioClassificationInput),
"automatic-speech-recognition": (AutomaticSpeechRecognitionPipeline, AutomaticSpeechRecognitionInput),
"depth-estimation": (DepthEstimationPipeline, DepthEstimationInput),
"image-classification": (ImageClassificationPipeline, ImageClassificationInput),
"image-segmentation": (ImageSegmentationPipeline, ImageSegmentationInput),
"image-to-text": (ImageToTextPipeline, ImageToTextInput),
"object-detection": (ObjectDetectionPipeline, ObjectDetectionInput),
"zero-shot-image-classification": (ZeroShotImageClassificationPipeline, ZeroShotImageClassificationInput),
}
for task, task_info in pipeline_test_mapping.items():