Image pipelines spec compliance (#33899)
* Update many similar visual pipelines * Add input tests * Add ImageToText as well * Add output tests * Add output tests * Add output tests * OutputElement -> Output * Correctly test elements * make fixup * fix typo in the task list * Fix VQA testing * Add copyright to image_classification.py * Revert changes to VQA pipeline because outputs have differences - will move to another PR * make fixup * Remove deprecation warnings
This commit is contained in:
@@ -14,11 +14,13 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from huggingface_hub import DepthEstimationOutput
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
|
||||
from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
|
||||
from transformers.pipelines import DepthEstimationPipeline, pipeline
|
||||
from transformers.testing_utils import (
|
||||
compare_pipeline_output_to_hub_spec,
|
||||
is_pipeline_test,
|
||||
nested_simplify,
|
||||
require_tf,
|
||||
@@ -94,6 +96,9 @@ class DepthEstimationPipelineTests(unittest.TestCase):
|
||||
outputs,
|
||||
)
|
||||
|
||||
for single_output in outputs:
|
||||
compare_pipeline_output_to_hub_spec(single_output, DepthEstimationOutput)
|
||||
|
||||
@require_tf
|
||||
@unittest.skip(reason="Depth estimation is not implemented in TF")
|
||||
def test_small_model_tf(self):
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from huggingface_hub import ImageClassificationOutputElement
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||
@@ -23,6 +25,7 @@ from transformers import (
|
||||
)
|
||||
from transformers.pipelines import ImageClassificationPipeline, pipeline
|
||||
from transformers.testing_utils import (
|
||||
compare_pipeline_output_to_hub_spec,
|
||||
is_pipeline_test,
|
||||
nested_simplify,
|
||||
require_tf,
|
||||
@@ -121,6 +124,10 @@ class ImageClassificationPipelineTests(unittest.TestCase):
|
||||
],
|
||||
)
|
||||
|
||||
for single_output in outputs:
|
||||
for output_element in single_output:
|
||||
compare_pipeline_output_to_hub_spec(output_element, ImageClassificationOutputElement)
|
||||
|
||||
@require_torch
|
||||
def test_small_model_pt(self):
|
||||
small_model = "hf-internal-testing/tiny-random-vit"
|
||||
|
||||
@@ -20,6 +20,7 @@ import datasets
|
||||
import numpy as np
|
||||
import requests
|
||||
from datasets import load_dataset
|
||||
from huggingface_hub import ImageSegmentationOutputElement
|
||||
from huggingface_hub.utils import insecure_hashlib
|
||||
|
||||
from transformers import (
|
||||
@@ -36,6 +37,7 @@ from transformers import (
|
||||
pipeline,
|
||||
)
|
||||
from transformers.testing_utils import (
|
||||
compare_pipeline_output_to_hub_spec,
|
||||
is_pipeline_test,
|
||||
nested_simplify,
|
||||
require_tf,
|
||||
@@ -168,6 +170,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
||||
f"Expected [{n}, {n}, {n}, {n}, {n}], got {[len(item) for item in outputs]}",
|
||||
)
|
||||
|
||||
for single_output in outputs:
|
||||
for output_element in single_output:
|
||||
compare_pipeline_output_to_hub_spec(output_element, ImageSegmentationOutputElement)
|
||||
|
||||
@require_tf
|
||||
@unittest.skip(reason="Image segmentation not implemented in TF")
|
||||
def test_small_model_tf(self):
|
||||
|
||||
@@ -15,10 +15,12 @@
|
||||
import unittest
|
||||
|
||||
import requests
|
||||
from huggingface_hub import ImageToTextOutput
|
||||
|
||||
from transformers import MODEL_FOR_VISION_2_SEQ_MAPPING, TF_MODEL_FOR_VISION_2_SEQ_MAPPING, is_vision_available
|
||||
from transformers.pipelines import ImageToTextPipeline, pipeline
|
||||
from transformers.testing_utils import (
|
||||
compare_pipeline_output_to_hub_spec,
|
||||
is_pipeline_test,
|
||||
require_tf,
|
||||
require_torch,
|
||||
@@ -103,6 +105,9 @@ class ImageToTextPipelineTests(unittest.TestCase):
|
||||
[{"generated_text": "growth"}],
|
||||
)
|
||||
|
||||
for single_output in outputs:
|
||||
compare_pipeline_output_to_hub_spec(single_output, ImageToTextOutput)
|
||||
|
||||
@require_torch
|
||||
def test_small_model_pt(self):
|
||||
pipe = pipeline("image-to-text", model="hf-internal-testing/tiny-random-vit-gpt2")
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from huggingface_hub import ObjectDetectionOutputElement
|
||||
|
||||
from transformers import (
|
||||
MODEL_FOR_OBJECT_DETECTION_MAPPING,
|
||||
AutoFeatureExtractor,
|
||||
@@ -22,7 +24,8 @@ from transformers import (
|
||||
is_vision_available,
|
||||
pipeline,
|
||||
)
|
||||
from transformers.testing_utils import (
|
||||
from transformers.testing_utils import ( #
|
||||
compare_pipeline_output_to_hub_spec,
|
||||
is_pipeline_test,
|
||||
nested_simplify,
|
||||
require_pytesseract,
|
||||
@@ -101,6 +104,7 @@ class ObjectDetectionPipelineTests(unittest.TestCase):
|
||||
"box": {"xmin": ANY(int), "ymin": ANY(int), "xmax": ANY(int), "ymax": ANY(int)},
|
||||
},
|
||||
)
|
||||
compare_pipeline_output_to_hub_spec(detected_object, ObjectDetectionOutputElement)
|
||||
|
||||
@require_tf
|
||||
@unittest.skip(reason="Object detection not implemented in TF")
|
||||
|
||||
@@ -14,9 +14,12 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from huggingface_hub import ZeroShotImageClassificationOutputElement
|
||||
|
||||
from transformers import is_vision_available
|
||||
from transformers.pipelines import pipeline
|
||||
from transformers.testing_utils import (
|
||||
compare_pipeline_output_to_hub_spec,
|
||||
is_pipeline_test,
|
||||
nested_simplify,
|
||||
require_tf,
|
||||
@@ -127,6 +130,9 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase):
|
||||
],
|
||||
)
|
||||
|
||||
for single_output in output:
|
||||
compare_pipeline_output_to_hub_spec(single_output, ZeroShotImageClassificationOutputElement)
|
||||
|
||||
@require_torch
|
||||
def test_small_model_pt_fp16(self):
|
||||
self.test_small_model_pt(torch_dtype="float16")
|
||||
|
||||
@@ -25,9 +25,27 @@ from pathlib import Path
|
||||
from textwrap import dedent
|
||||
from typing import get_args
|
||||
|
||||
from huggingface_hub import AudioClassificationInput, AutomaticSpeechRecognitionInput
|
||||
from huggingface_hub import (
|
||||
AudioClassificationInput,
|
||||
AutomaticSpeechRecognitionInput,
|
||||
DepthEstimationInput,
|
||||
ImageClassificationInput,
|
||||
ImageSegmentationInput,
|
||||
ImageToTextInput,
|
||||
ObjectDetectionInput,
|
||||
ZeroShotImageClassificationInput,
|
||||
)
|
||||
|
||||
from transformers.pipelines import AudioClassificationPipeline, AutomaticSpeechRecognitionPipeline
|
||||
from transformers.pipelines import (
|
||||
AudioClassificationPipeline,
|
||||
AutomaticSpeechRecognitionPipeline,
|
||||
DepthEstimationPipeline,
|
||||
ImageClassificationPipeline,
|
||||
ImageSegmentationPipeline,
|
||||
ImageToTextPipeline,
|
||||
ObjectDetectionPipeline,
|
||||
ZeroShotImageClassificationPipeline,
|
||||
)
|
||||
from transformers.testing_utils import (
|
||||
is_pipeline_test,
|
||||
require_decord,
|
||||
@@ -105,6 +123,12 @@ task_to_pipeline_and_spec_mapping = {
|
||||
# task spec in the HF Hub
|
||||
"audio-classification": (AudioClassificationPipeline, AudioClassificationInput),
|
||||
"automatic-speech-recognition": (AutomaticSpeechRecognitionPipeline, AutomaticSpeechRecognitionInput),
|
||||
"depth-estimation": (DepthEstimationPipeline, DepthEstimationInput),
|
||||
"image-classification": (ImageClassificationPipeline, ImageClassificationInput),
|
||||
"image-segmentation": (ImageSegmentationPipeline, ImageSegmentationInput),
|
||||
"image-to-text": (ImageToTextPipeline, ImageToTextInput),
|
||||
"object-detection": (ObjectDetectionPipeline, ObjectDetectionInput),
|
||||
"zero-shot-image-classification": (ZeroShotImageClassificationPipeline, ZeroShotImageClassificationInput),
|
||||
}
|
||||
|
||||
for task, task_info in pipeline_test_mapping.items():
|
||||
|
||||
Reference in New Issue
Block a user