Supporting ImageProcessor in place of FeatureExtractor for pipelines (#20851)
* Fixing the pipeline with image processor. * Update the slow test. * Using only the first image processor. * Include exclusion mecanism for Image processor. * Do not handle Gitconfig, deemed as a bug. * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Remove `conversational` changes. They are not supposed to be here. * Address first row of comments. * Remove OneFormer modifications. Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -31,8 +31,10 @@ from huggingface_hub import model_info
|
||||
from ..configuration_utils import PretrainedConfig
|
||||
from ..dynamic_module_utils import get_class_from_dynamic_module
|
||||
from ..feature_extraction_utils import PreTrainedFeatureExtractor
|
||||
from ..image_processing_utils import BaseImageProcessor
|
||||
from ..models.auto.configuration_auto import AutoConfig
|
||||
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
|
||||
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
|
||||
from ..models.auto.modeling_auto import AutoModelForDepthEstimation
|
||||
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
||||
from ..tokenization_utils import PreTrainedTokenizer
|
||||
@@ -374,6 +376,7 @@ SUPPORTED_TASKS = {
|
||||
}
|
||||
|
||||
NO_FEATURE_EXTRACTOR_TASKS = set()
|
||||
NO_IMAGE_PROCESSOR_TASKS = set()
|
||||
NO_TOKENIZER_TASKS = set()
|
||||
# Those model configs are special, they are generic over their task, meaning
|
||||
# any tokenizer/feature_extractor might be use for a given model so we cannot
|
||||
@@ -383,6 +386,7 @@ MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig
|
||||
for task, values in SUPPORTED_TASKS.items():
|
||||
if values["type"] == "text":
|
||||
NO_FEATURE_EXTRACTOR_TASKS.add(task)
|
||||
NO_IMAGE_PROCESSOR_TASKS.add(task)
|
||||
elif values["type"] in {"audio", "image", "video"}:
|
||||
NO_TOKENIZER_TASKS.add(task)
|
||||
elif values["type"] != "multimodal":
|
||||
@@ -482,6 +486,7 @@ def pipeline(
|
||||
config: Optional[Union[str, PretrainedConfig]] = None,
|
||||
tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None,
|
||||
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
|
||||
image_processor: Optional[Union[str, BaseImageProcessor]] = None,
|
||||
framework: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
use_fast: bool = True,
|
||||
@@ -766,6 +771,7 @@ def pipeline(
|
||||
|
||||
load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
|
||||
load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
|
||||
load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
|
||||
|
||||
if (
|
||||
tokenizer is None
|
||||
@@ -799,6 +805,8 @@ def pipeline(
|
||||
|
||||
if task in NO_FEATURE_EXTRACTOR_TASKS:
|
||||
load_feature_extractor = False
|
||||
if task in NO_IMAGE_PROCESSOR_TASKS:
|
||||
load_image_processor = False
|
||||
|
||||
if load_tokenizer:
|
||||
# Try to infer tokenizer from model or config name (if provided as str)
|
||||
@@ -829,6 +837,27 @@ def pipeline(
|
||||
tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
|
||||
)
|
||||
|
||||
if load_image_processor:
|
||||
# Try to infer image processor from model or config name (if provided as str)
|
||||
if image_processor is None:
|
||||
if isinstance(model_name, str):
|
||||
image_processor = model_name
|
||||
elif isinstance(config, str):
|
||||
image_processor = config
|
||||
else:
|
||||
# Impossible to guess what is the right image_processor here
|
||||
raise Exception(
|
||||
"Impossible to guess which image processor to use. "
|
||||
"Please provide a PreTrainedImageProcessor class or a path/identifier "
|
||||
"to a pretrained image processor."
|
||||
)
|
||||
|
||||
# Instantiate image_processor if needed
|
||||
if isinstance(image_processor, (str, tuple)):
|
||||
image_processor = AutoImageProcessor.from_pretrained(
|
||||
image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
||||
)
|
||||
|
||||
if load_feature_extractor:
|
||||
# Try to infer feature extractor from model or config name (if provided as str)
|
||||
if feature_extractor is None:
|
||||
@@ -897,6 +926,9 @@ def pipeline(
|
||||
if torch_dtype is not None:
|
||||
kwargs["torch_dtype"] = torch_dtype
|
||||
|
||||
if image_processor is not None:
|
||||
kwargs["image_processor"] = image_processor
|
||||
|
||||
if device is not None:
|
||||
kwargs["device"] = device
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ from packaging import version
|
||||
|
||||
from ..dynamic_module_utils import custom_object_save
|
||||
from ..feature_extraction_utils import PreTrainedFeatureExtractor
|
||||
from ..image_processing_utils import BaseImageProcessor
|
||||
from ..modelcard import ModelCard
|
||||
from ..models.auto.configuration_auto import AutoConfig
|
||||
from ..tokenization_utils import PreTrainedTokenizer
|
||||
@@ -743,6 +744,7 @@ class Pipeline(_ScikitCompat):
|
||||
model: Union["PreTrainedModel", "TFPreTrainedModel"],
|
||||
tokenizer: Optional[PreTrainedTokenizer] = None,
|
||||
feature_extractor: Optional[PreTrainedFeatureExtractor] = None,
|
||||
image_processor: Optional[BaseImageProcessor] = None,
|
||||
modelcard: Optional[ModelCard] = None,
|
||||
framework: Optional[str] = None,
|
||||
task: str = "",
|
||||
@@ -759,6 +761,7 @@ class Pipeline(_ScikitCompat):
|
||||
self.model = model
|
||||
self.tokenizer = tokenizer
|
||||
self.feature_extractor = feature_extractor
|
||||
self.image_processor = image_processor
|
||||
self.modelcard = modelcard
|
||||
self.framework = framework
|
||||
if is_torch_available() and self.framework == "pt":
|
||||
@@ -1012,7 +1015,9 @@ class Pipeline(_ScikitCompat):
|
||||
if "TOKENIZERS_PARALLELISM" not in os.environ:
|
||||
logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already")
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
|
||||
# TODO hack by collating feature_extractor and image_processor
|
||||
feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor
|
||||
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor)
|
||||
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
|
||||
model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
|
||||
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
|
||||
@@ -1121,7 +1126,10 @@ class ChunkPipeline(Pipeline):
|
||||
)
|
||||
num_workers = 1
|
||||
dataset = PipelineChunkIterator(inputs, self.preprocess, preprocess_params)
|
||||
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
|
||||
|
||||
# TODO hack by collating feature_extractor and image_processor
|
||||
feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor
|
||||
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor)
|
||||
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
|
||||
model_iterator = PipelinePackIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
|
||||
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
|
||||
|
||||
@@ -67,6 +67,12 @@ class ImageSegmentationPipeline(Pipeline):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
if self.image_processor is None and self.feature_extractor is not None:
|
||||
# Backward compatible change, if users called
|
||||
# ImageSegmentationPipeline(.., feature_extractor=MyFeatureExtractor())
|
||||
# then we should keep working
|
||||
self.image_processor = self.feature_extractor
|
||||
|
||||
if self.framework == "tf":
|
||||
raise ValueError(f"The {self.__class__} is only available in PyTorch.")
|
||||
|
||||
@@ -137,7 +143,7 @@ class ImageSegmentationPipeline(Pipeline):
|
||||
def preprocess(self, image):
|
||||
image = load_image(image)
|
||||
target_size = [(image.height, image.width)]
|
||||
inputs = self.feature_extractor(images=[image], return_tensors="pt")
|
||||
inputs = self.image_processor(images=[image], return_tensors="pt")
|
||||
inputs["target_size"] = target_size
|
||||
return inputs
|
||||
|
||||
@@ -152,10 +158,10 @@ class ImageSegmentationPipeline(Pipeline):
|
||||
):
|
||||
|
||||
fn = None
|
||||
if subtask in {"panoptic", None} and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"):
|
||||
fn = self.feature_extractor.post_process_panoptic_segmentation
|
||||
elif subtask in {"instance", None} and hasattr(self.feature_extractor, "post_process_instance_segmentation"):
|
||||
fn = self.feature_extractor.post_process_instance_segmentation
|
||||
if subtask in {"panoptic", None} and hasattr(self.image_processor, "post_process_panoptic_segmentation"):
|
||||
fn = self.image_processor.post_process_panoptic_segmentation
|
||||
elif subtask in {"instance", None} and hasattr(self.image_processor, "post_process_instance_segmentation"):
|
||||
fn = self.image_processor.post_process_instance_segmentation
|
||||
|
||||
if fn is not None:
|
||||
outputs = fn(
|
||||
@@ -176,8 +182,8 @@ class ImageSegmentationPipeline(Pipeline):
|
||||
score = segment["score"]
|
||||
annotation.append({"score": score, "label": label, "mask": mask})
|
||||
|
||||
elif subtask in {"semantic", None} and hasattr(self.feature_extractor, "post_process_semantic_segmentation"):
|
||||
outputs = self.feature_extractor.post_process_semantic_segmentation(
|
||||
elif subtask in {"semantic", None} and hasattr(self.image_processor, "post_process_semantic_segmentation"):
|
||||
outputs = self.image_processor.post_process_semantic_segmentation(
|
||||
model_outputs, target_sizes=model_outputs["target_size"]
|
||||
)[0]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user