Supporting ImageProcessor in place of FeatureExtractor for pipelines (#20851)
* Fixing the pipeline with image processor. * Update the slow test. * Using only the first image processor. * Include exclusion mecanism for Image processor. * Do not handle Gitconfig, deemed as a bug. * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Remove `conversational` changes. They are not supposed to be here. * Address first row of comments. * Remove OneFormer modifications. Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -31,8 +31,10 @@ from huggingface_hub import model_info
|
||||
from ..configuration_utils import PretrainedConfig
|
||||
from ..dynamic_module_utils import get_class_from_dynamic_module
|
||||
from ..feature_extraction_utils import PreTrainedFeatureExtractor
|
||||
from ..image_processing_utils import BaseImageProcessor
|
||||
from ..models.auto.configuration_auto import AutoConfig
|
||||
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
|
||||
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
|
||||
from ..models.auto.modeling_auto import AutoModelForDepthEstimation
|
||||
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
||||
from ..tokenization_utils import PreTrainedTokenizer
|
||||
@@ -374,6 +376,7 @@ SUPPORTED_TASKS = {
|
||||
}
|
||||
|
||||
NO_FEATURE_EXTRACTOR_TASKS = set()
|
||||
NO_IMAGE_PROCESSOR_TASKS = set()
|
||||
NO_TOKENIZER_TASKS = set()
|
||||
# Those model configs are special, they are generic over their task, meaning
|
||||
# any tokenizer/feature_extractor might be use for a given model so we cannot
|
||||
@@ -383,6 +386,7 @@ MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig
|
||||
for task, values in SUPPORTED_TASKS.items():
|
||||
if values["type"] == "text":
|
||||
NO_FEATURE_EXTRACTOR_TASKS.add(task)
|
||||
NO_IMAGE_PROCESSOR_TASKS.add(task)
|
||||
elif values["type"] in {"audio", "image", "video"}:
|
||||
NO_TOKENIZER_TASKS.add(task)
|
||||
elif values["type"] != "multimodal":
|
||||
@@ -482,6 +486,7 @@ def pipeline(
|
||||
config: Optional[Union[str, PretrainedConfig]] = None,
|
||||
tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None,
|
||||
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
|
||||
image_processor: Optional[Union[str, BaseImageProcessor]] = None,
|
||||
framework: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
use_fast: bool = True,
|
||||
@@ -766,6 +771,7 @@ def pipeline(
|
||||
|
||||
load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
|
||||
load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
|
||||
load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
|
||||
|
||||
if (
|
||||
tokenizer is None
|
||||
@@ -799,6 +805,8 @@ def pipeline(
|
||||
|
||||
if task in NO_FEATURE_EXTRACTOR_TASKS:
|
||||
load_feature_extractor = False
|
||||
if task in NO_IMAGE_PROCESSOR_TASKS:
|
||||
load_image_processor = False
|
||||
|
||||
if load_tokenizer:
|
||||
# Try to infer tokenizer from model or config name (if provided as str)
|
||||
@@ -829,6 +837,27 @@ def pipeline(
|
||||
tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
|
||||
)
|
||||
|
||||
if load_image_processor:
|
||||
# Try to infer image processor from model or config name (if provided as str)
|
||||
if image_processor is None:
|
||||
if isinstance(model_name, str):
|
||||
image_processor = model_name
|
||||
elif isinstance(config, str):
|
||||
image_processor = config
|
||||
else:
|
||||
# Impossible to guess what is the right image_processor here
|
||||
raise Exception(
|
||||
"Impossible to guess which image processor to use. "
|
||||
"Please provide a PreTrainedImageProcessor class or a path/identifier "
|
||||
"to a pretrained image processor."
|
||||
)
|
||||
|
||||
# Instantiate image_processor if needed
|
||||
if isinstance(image_processor, (str, tuple)):
|
||||
image_processor = AutoImageProcessor.from_pretrained(
|
||||
image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
||||
)
|
||||
|
||||
if load_feature_extractor:
|
||||
# Try to infer feature extractor from model or config name (if provided as str)
|
||||
if feature_extractor is None:
|
||||
@@ -897,6 +926,9 @@ def pipeline(
|
||||
if torch_dtype is not None:
|
||||
kwargs["torch_dtype"] = torch_dtype
|
||||
|
||||
if image_processor is not None:
|
||||
kwargs["image_processor"] = image_processor
|
||||
|
||||
if device is not None:
|
||||
kwargs["device"] = device
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ from packaging import version
|
||||
|
||||
from ..dynamic_module_utils import custom_object_save
|
||||
from ..feature_extraction_utils import PreTrainedFeatureExtractor
|
||||
from ..image_processing_utils import BaseImageProcessor
|
||||
from ..modelcard import ModelCard
|
||||
from ..models.auto.configuration_auto import AutoConfig
|
||||
from ..tokenization_utils import PreTrainedTokenizer
|
||||
@@ -743,6 +744,7 @@ class Pipeline(_ScikitCompat):
|
||||
model: Union["PreTrainedModel", "TFPreTrainedModel"],
|
||||
tokenizer: Optional[PreTrainedTokenizer] = None,
|
||||
feature_extractor: Optional[PreTrainedFeatureExtractor] = None,
|
||||
image_processor: Optional[BaseImageProcessor] = None,
|
||||
modelcard: Optional[ModelCard] = None,
|
||||
framework: Optional[str] = None,
|
||||
task: str = "",
|
||||
@@ -759,6 +761,7 @@ class Pipeline(_ScikitCompat):
|
||||
self.model = model
|
||||
self.tokenizer = tokenizer
|
||||
self.feature_extractor = feature_extractor
|
||||
self.image_processor = image_processor
|
||||
self.modelcard = modelcard
|
||||
self.framework = framework
|
||||
if is_torch_available() and self.framework == "pt":
|
||||
@@ -1012,7 +1015,9 @@ class Pipeline(_ScikitCompat):
|
||||
if "TOKENIZERS_PARALLELISM" not in os.environ:
|
||||
logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already")
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
|
||||
# TODO hack by collating feature_extractor and image_processor
|
||||
feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor
|
||||
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor)
|
||||
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
|
||||
model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
|
||||
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
|
||||
@@ -1121,7 +1126,10 @@ class ChunkPipeline(Pipeline):
|
||||
)
|
||||
num_workers = 1
|
||||
dataset = PipelineChunkIterator(inputs, self.preprocess, preprocess_params)
|
||||
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
|
||||
|
||||
# TODO hack by collating feature_extractor and image_processor
|
||||
feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor
|
||||
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor)
|
||||
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
|
||||
model_iterator = PipelinePackIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
|
||||
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
|
||||
|
||||
@@ -67,6 +67,12 @@ class ImageSegmentationPipeline(Pipeline):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
if self.image_processor is None and self.feature_extractor is not None:
|
||||
# Backward compatible change, if users called
|
||||
# ImageSegmentationPipeline(.., feature_extractor=MyFeatureExtractor())
|
||||
# then we should keep working
|
||||
self.image_processor = self.feature_extractor
|
||||
|
||||
if self.framework == "tf":
|
||||
raise ValueError(f"The {self.__class__} is only available in PyTorch.")
|
||||
|
||||
@@ -137,7 +143,7 @@ class ImageSegmentationPipeline(Pipeline):
|
||||
def preprocess(self, image):
|
||||
image = load_image(image)
|
||||
target_size = [(image.height, image.width)]
|
||||
inputs = self.feature_extractor(images=[image], return_tensors="pt")
|
||||
inputs = self.image_processor(images=[image], return_tensors="pt")
|
||||
inputs["target_size"] = target_size
|
||||
return inputs
|
||||
|
||||
@@ -152,10 +158,10 @@ class ImageSegmentationPipeline(Pipeline):
|
||||
):
|
||||
|
||||
fn = None
|
||||
if subtask in {"panoptic", None} and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"):
|
||||
fn = self.feature_extractor.post_process_panoptic_segmentation
|
||||
elif subtask in {"instance", None} and hasattr(self.feature_extractor, "post_process_instance_segmentation"):
|
||||
fn = self.feature_extractor.post_process_instance_segmentation
|
||||
if subtask in {"panoptic", None} and hasattr(self.image_processor, "post_process_panoptic_segmentation"):
|
||||
fn = self.image_processor.post_process_panoptic_segmentation
|
||||
elif subtask in {"instance", None} and hasattr(self.image_processor, "post_process_instance_segmentation"):
|
||||
fn = self.image_processor.post_process_instance_segmentation
|
||||
|
||||
if fn is not None:
|
||||
outputs = fn(
|
||||
@@ -176,8 +182,8 @@ class ImageSegmentationPipeline(Pipeline):
|
||||
score = segment["score"]
|
||||
annotation.append({"score": score, "label": label, "mask": mask})
|
||||
|
||||
elif subtask in {"semantic", None} and hasattr(self.feature_extractor, "post_process_semantic_segmentation"):
|
||||
outputs = self.feature_extractor.post_process_semantic_segmentation(
|
||||
elif subtask in {"semantic", None} and hasattr(self.image_processor, "post_process_semantic_segmentation"):
|
||||
outputs = self.image_processor.post_process_semantic_segmentation(
|
||||
model_outputs, target_sizes=model_outputs["target_size"]
|
||||
)[0]
|
||||
|
||||
|
||||
@@ -29,9 +29,6 @@ import numpy as np
|
||||
from .import_utils import is_flax_available, is_tf_available, is_torch_available, is_torch_fx_proxy
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
if is_flax_available():
|
||||
import jax.numpy as jnp
|
||||
|
||||
@@ -437,6 +434,8 @@ def transpose(array, axes=None):
|
||||
elif is_torch_tensor(array):
|
||||
return array.T if axes is None else array.permute(*axes)
|
||||
elif is_tf_tensor(array):
|
||||
import tensorflow as tf
|
||||
|
||||
return tf.transpose(array, perm=axes)
|
||||
elif is_jax_tensor(array):
|
||||
return jnp.transpose(array, axes=axes)
|
||||
@@ -454,6 +453,8 @@ def reshape(array, newshape):
|
||||
elif is_torch_tensor(array):
|
||||
return array.reshape(*newshape)
|
||||
elif is_tf_tensor(array):
|
||||
import tensorflow as tf
|
||||
|
||||
return tf.reshape(array, newshape)
|
||||
elif is_jax_tensor(array):
|
||||
return jnp.reshape(array, newshape)
|
||||
@@ -471,6 +472,8 @@ def squeeze(array, axis=None):
|
||||
elif is_torch_tensor(array):
|
||||
return array.squeeze() if axis is None else array.squeeze(dim=axis)
|
||||
elif is_tf_tensor(array):
|
||||
import tensorflow as tf
|
||||
|
||||
return tf.squeeze(array, axis=axis)
|
||||
elif is_jax_tensor(array):
|
||||
return jnp.squeeze(array, axis=axis)
|
||||
@@ -488,6 +491,8 @@ def expand_dims(array, axis):
|
||||
elif is_torch_tensor(array):
|
||||
return array.unsqueeze(dim=axis)
|
||||
elif is_tf_tensor(array):
|
||||
import tensorflow as tf
|
||||
|
||||
return tf.expand_dims(array, axis=axis)
|
||||
elif is_jax_tensor(array):
|
||||
return jnp.expand_dims(array, axis=axis)
|
||||
@@ -504,6 +509,8 @@ def tensor_size(array):
|
||||
elif is_torch_tensor(array):
|
||||
return array.numel()
|
||||
elif is_tf_tensor(array):
|
||||
import tensorflow as tf
|
||||
|
||||
return tf.size(array)
|
||||
elif is_jax_tensor(array):
|
||||
return array.size
|
||||
|
||||
@@ -27,7 +27,7 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
|
||||
class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||
model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor)
|
||||
|
||||
# test with a raw waveform
|
||||
|
||||
@@ -61,7 +61,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
|
||||
+ (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
|
||||
}
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
if tokenizer is None:
|
||||
# Side effect of no Fast Tokenizer class for these model, so skipping
|
||||
# But the slow tokenizer test should still run as they're quite small
|
||||
|
||||
@@ -33,8 +33,10 @@ from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_
|
||||
from requests.exceptions import HTTPError
|
||||
from transformers import (
|
||||
FEATURE_EXTRACTOR_MAPPING,
|
||||
IMAGE_PROCESSOR_MAPPING,
|
||||
TOKENIZER_MAPPING,
|
||||
AutoFeatureExtractor,
|
||||
AutoImageProcessor,
|
||||
AutoModelForSequenceClassification,
|
||||
AutoTokenizer,
|
||||
DistilBertForSequenceClassification,
|
||||
@@ -154,8 +156,6 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
|
||||
feature_extractor = None
|
||||
except Exception:
|
||||
feature_extractor = None
|
||||
if hasattr(tiny_config, "image_size") and feature_extractor:
|
||||
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
|
||||
|
||||
# Audio Spectogram Transformer specific.
|
||||
if feature_extractor.__class__.__name__ == "ASTFeatureExtractor":
|
||||
@@ -168,9 +168,28 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
|
||||
feature_extractor = feature_extractor.__class__(
|
||||
feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
|
||||
)
|
||||
# TODO remove this, once those have been moved to `image_processor`.
|
||||
if hasattr(tiny_config, "image_size") and feature_extractor:
|
||||
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
|
||||
return feature_extractor
|
||||
|
||||
|
||||
def get_tiny_image_processor_from_checkpoint(checkpoint, tiny_config, image_processor_class):
|
||||
try:
|
||||
image_processor = AutoImageProcessor.from_pretrained(checkpoint)
|
||||
except Exception:
|
||||
try:
|
||||
if image_processor_class is not None:
|
||||
image_processor = image_processor_class()
|
||||
else:
|
||||
image_processor = None
|
||||
except Exception:
|
||||
image_processor = None
|
||||
if hasattr(tiny_config, "image_size") and image_processor:
|
||||
image_processor = image_processor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
|
||||
return image_processor
|
||||
|
||||
|
||||
class ANY:
|
||||
def __init__(self, *_types):
|
||||
self._types = _types
|
||||
@@ -184,7 +203,9 @@ class ANY:
|
||||
|
||||
class PipelineTestCaseMeta(type):
|
||||
def __new__(mcs, name, bases, dct):
|
||||
def gen_test(ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class):
|
||||
def gen_test(
|
||||
ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class, image_processor_class
|
||||
):
|
||||
@skipIf(
|
||||
tiny_config is None,
|
||||
"TinyConfig does not exist, make sure that you defined a `_CONFIG_FOR_DOC` variable in the modeling"
|
||||
@@ -231,16 +252,21 @@ class PipelineTestCaseMeta(type):
|
||||
self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
|
||||
else:
|
||||
tokenizer = None
|
||||
|
||||
feature_extractor = get_tiny_feature_extractor_from_checkpoint(
|
||||
checkpoint, tiny_config, feature_extractor_class
|
||||
)
|
||||
|
||||
if tokenizer is None and feature_extractor is None:
|
||||
self.skipTest(
|
||||
f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with"
|
||||
" no FastTokenizer ?)"
|
||||
image_processor = get_tiny_image_processor_from_checkpoint(
|
||||
checkpoint, tiny_config, image_processor_class
|
||||
)
|
||||
pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor)
|
||||
|
||||
if tokenizer is None and feature_extractor is None and image_processor:
|
||||
self.skipTest(
|
||||
f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor or image_processor"
|
||||
" (PerceiverConfig with no FastTokenizer ?)"
|
||||
)
|
||||
pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor, image_processor)
|
||||
if pipeline is None:
|
||||
# The test can disable itself, but it should be very marginal
|
||||
# Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
|
||||
@@ -283,6 +309,10 @@ class PipelineTestCaseMeta(type):
|
||||
feature_extractor_name = (
|
||||
feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor"
|
||||
)
|
||||
image_processor_class = IMAGE_PROCESSOR_MAPPING.get(configuration, None)
|
||||
image_processor_name = (
|
||||
image_processor_class.__name__ if image_processor_class else "noimage_processor"
|
||||
)
|
||||
if not tokenizer_classes:
|
||||
# We need to test even if there are no tokenizers.
|
||||
tokenizer_classes = [None]
|
||||
@@ -300,7 +330,7 @@ class PipelineTestCaseMeta(type):
|
||||
else:
|
||||
tokenizer_name = "notokenizer"
|
||||
|
||||
test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}"
|
||||
test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}_{image_processor_name}"
|
||||
|
||||
if tokenizer_class is not None or feature_extractor_class is not None:
|
||||
dct[test_name] = gen_test(
|
||||
@@ -309,6 +339,7 @@ class PipelineTestCaseMeta(type):
|
||||
tiny_config,
|
||||
tokenizer_class,
|
||||
feature_extractor_class,
|
||||
image_processor_class,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -53,7 +53,7 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
|
||||
else []
|
||||
)
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
|
||||
return conversation_agent, [Conversation("Hi there!")]
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCase
|
||||
|
||||
model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
depth_estimator = DepthEstimationPipeline(model=model, feature_extractor=feature_extractor)
|
||||
return depth_estimator, [
|
||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||
|
||||
@@ -59,7 +59,7 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=Pipeli
|
||||
|
||||
@require_pytesseract
|
||||
@require_vision
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
dqa_pipeline = pipeline(
|
||||
"document-question-answering", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
|
||||
)
|
||||
|
||||
@@ -175,7 +175,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
||||
raise ValueError("We expect lists of floats, nothing else")
|
||||
return shape
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
if tokenizer is None:
|
||||
self.skipTest("No tokenizer")
|
||||
return
|
||||
|
||||
@@ -206,7 +206,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||
unmasker.tokenizer.pad_token = None
|
||||
self.run_pipeline_test(unmasker, [])
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
if tokenizer is None or tokenizer.mask_token_id is None:
|
||||
self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
|
||||
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2)
|
||||
examples = [
|
||||
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
|
||||
|
||||
@@ -26,6 +26,7 @@ from transformers import (
|
||||
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
|
||||
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
|
||||
AutoFeatureExtractor,
|
||||
AutoImageProcessor,
|
||||
AutoModelForImageSegmentation,
|
||||
AutoModelForInstanceSegmentation,
|
||||
DetrForSegmentation,
|
||||
@@ -80,8 +81,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
||||
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
|
||||
}
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor)
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
image_segmenter = ImageSegmentationPipeline(
|
||||
model=model, feature_extractor=feature_extractor, image_processor=image_processor
|
||||
)
|
||||
return image_segmenter, [
|
||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||
@@ -139,7 +142,11 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||
]
|
||||
outputs = image_segmenter(
|
||||
batch, threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0, batch_size=batch_size
|
||||
batch,
|
||||
threshold=0.0,
|
||||
mask_threshold=0,
|
||||
overlap_mask_area_threshold=0,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
self.assertEqual(len(batch), len(outputs))
|
||||
self.assertEqual(len(outputs[0]), n)
|
||||
@@ -188,10 +195,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
|
||||
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic"
|
||||
|
||||
model = AutoModelForImageSegmentation.from_pretrained(model_id)
|
||||
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
|
||||
image_processor = AutoImageProcessor.from_pretrained(model_id)
|
||||
image_segmenter = ImageSegmentationPipeline(
|
||||
model=model,
|
||||
feature_extractor=feature_extractor,
|
||||
image_processor=image_processor,
|
||||
subtask="panoptic",
|
||||
threshold=0.0,
|
||||
mask_threshold=0.0,
|
||||
|
||||
@@ -36,7 +36,7 @@ class ImageToTextPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
|
||||
model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
examples = [
|
||||
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
|
||||
|
||||
@@ -51,7 +51,7 @@ else:
|
||||
class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
|
||||
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||
model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
if isinstance(model.config, LxmertConfig):
|
||||
# This is an bimodal model, we need to find a more consistent way
|
||||
# to switch on those models.
|
||||
|
||||
@@ -34,7 +34,7 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe
|
||||
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
|
||||
return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTest
|
||||
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
||||
return generator, ["Something to write", "Something else"]
|
||||
|
||||
|
||||
@@ -129,7 +129,7 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC
|
||||
outputs = text_classifier("Birds are a type of animal")
|
||||
self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
|
||||
return text_classifier, ["HuggingFace is in", "This is another test"]
|
||||
|
||||
|
||||
@@ -143,7 +143,7 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
|
||||
],
|
||||
)
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
||||
return text_generator, ["This is a test", "Another test"]
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
|
||||
model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
|
||||
return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
|
||||
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
if isinstance(model.config, MBartConfig):
|
||||
src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
|
||||
translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
|
||||
|
||||
@@ -35,7 +35,7 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
|
||||
class VideoClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||
model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
example_video_filepath = hf_hub_download(
|
||||
repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
|
||||
)
|
||||
|
||||
@@ -36,7 +36,7 @@ else:
|
||||
class VisualQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||
model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
|
||||
examples = [
|
||||
{
|
||||
|
||||
@@ -30,7 +30,7 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT
|
||||
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
classifier = ZeroShotClassificationPipeline(
|
||||
model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
|
||||
)
|
||||
|
||||
@@ -37,7 +37,7 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase, metaclass=Pipe
|
||||
# and only CLIP would be there for now.
|
||||
# model_mapping = {CLIPConfig: CLIPModel}
|
||||
|
||||
# def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
# def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
# if tokenizer is None:
|
||||
# # Side effect of no Fast Tokenizer class for these model, so skipping
|
||||
# # But the slow tokenizer test should still run as they're quite small
|
||||
|
||||
@@ -36,7 +36,7 @@ class ZeroShotObjectDetectionPipelineTests(unittest.TestCase, metaclass=Pipeline
|
||||
|
||||
model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
|
||||
object_detector = pipeline(
|
||||
"zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user