Supporting ImageProcessor in place of FeatureExtractor for pipelines (#20851)

* Fixing the pipeline with image processor.

* Update the slow test.

* Using only the first image processor.

* Include exclusion mecanism for Image processor.

* Do not handle Gitconfig, deemed as a bug.

* Apply suggestions from code review

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Remove `conversational` changes. They are not supposed to be here.

* Address first row of comments.

* Remove OneFormer modifications.

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Nicolas Patry
2023-01-25 10:16:31 +01:00
committed by GitHub
parent efdbad56ab
commit 99e7905422
28 changed files with 138 additions and 47 deletions

View File

@@ -31,8 +31,10 @@ from huggingface_hub import model_info
from ..configuration_utils import PretrainedConfig
from ..dynamic_module_utils import get_class_from_dynamic_module
from ..feature_extraction_utils import PreTrainedFeatureExtractor
from ..image_processing_utils import BaseImageProcessor
from ..models.auto.configuration_auto import AutoConfig
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
from ..models.auto.modeling_auto import AutoModelForDepthEstimation
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
from ..tokenization_utils import PreTrainedTokenizer
@@ -374,6 +376,7 @@ SUPPORTED_TASKS = {
}
NO_FEATURE_EXTRACTOR_TASKS = set()
NO_IMAGE_PROCESSOR_TASKS = set()
NO_TOKENIZER_TASKS = set()
# Those model configs are special, they are generic over their task, meaning
# any tokenizer/feature_extractor might be use for a given model so we cannot
@@ -383,6 +386,7 @@ MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig
for task, values in SUPPORTED_TASKS.items():
if values["type"] == "text":
NO_FEATURE_EXTRACTOR_TASKS.add(task)
NO_IMAGE_PROCESSOR_TASKS.add(task)
elif values["type"] in {"audio", "image", "video"}:
NO_TOKENIZER_TASKS.add(task)
elif values["type"] != "multimodal":
@@ -482,6 +486,7 @@ def pipeline(
config: Optional[Union[str, PretrainedConfig]] = None,
tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None,
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
image_processor: Optional[Union[str, BaseImageProcessor]] = None,
framework: Optional[str] = None,
revision: Optional[str] = None,
use_fast: bool = True,
@@ -766,6 +771,7 @@ def pipeline(
load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
if (
tokenizer is None
@@ -799,6 +805,8 @@ def pipeline(
if task in NO_FEATURE_EXTRACTOR_TASKS:
load_feature_extractor = False
if task in NO_IMAGE_PROCESSOR_TASKS:
load_image_processor = False
if load_tokenizer:
# Try to infer tokenizer from model or config name (if provided as str)
@@ -829,6 +837,27 @@ def pipeline(
tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
)
if load_image_processor:
# Try to infer image processor from model or config name (if provided as str)
if image_processor is None:
if isinstance(model_name, str):
image_processor = model_name
elif isinstance(config, str):
image_processor = config
else:
# Impossible to guess what is the right image_processor here
raise Exception(
"Impossible to guess which image processor to use. "
"Please provide a PreTrainedImageProcessor class or a path/identifier "
"to a pretrained image processor."
)
# Instantiate image_processor if needed
if isinstance(image_processor, (str, tuple)):
image_processor = AutoImageProcessor.from_pretrained(
image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
)
if load_feature_extractor:
# Try to infer feature extractor from model or config name (if provided as str)
if feature_extractor is None:
@@ -897,6 +926,9 @@ def pipeline(
if torch_dtype is not None:
kwargs["torch_dtype"] = torch_dtype
if image_processor is not None:
kwargs["image_processor"] = image_processor
if device is not None:
kwargs["device"] = device

View File

@@ -31,6 +31,7 @@ from packaging import version
from ..dynamic_module_utils import custom_object_save
from ..feature_extraction_utils import PreTrainedFeatureExtractor
from ..image_processing_utils import BaseImageProcessor
from ..modelcard import ModelCard
from ..models.auto.configuration_auto import AutoConfig
from ..tokenization_utils import PreTrainedTokenizer
@@ -743,6 +744,7 @@ class Pipeline(_ScikitCompat):
model: Union["PreTrainedModel", "TFPreTrainedModel"],
tokenizer: Optional[PreTrainedTokenizer] = None,
feature_extractor: Optional[PreTrainedFeatureExtractor] = None,
image_processor: Optional[BaseImageProcessor] = None,
modelcard: Optional[ModelCard] = None,
framework: Optional[str] = None,
task: str = "",
@@ -759,6 +761,7 @@ class Pipeline(_ScikitCompat):
self.model = model
self.tokenizer = tokenizer
self.feature_extractor = feature_extractor
self.image_processor = image_processor
self.modelcard = modelcard
self.framework = framework
if is_torch_available() and self.framework == "pt":
@@ -1012,7 +1015,9 @@ class Pipeline(_ScikitCompat):
if "TOKENIZERS_PARALLELISM" not in os.environ:
logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
# TODO hack by collating feature_extractor and image_processor
feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor)
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
@@ -1121,7 +1126,10 @@ class ChunkPipeline(Pipeline):
)
num_workers = 1
dataset = PipelineChunkIterator(inputs, self.preprocess, preprocess_params)
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
# TODO hack by collating feature_extractor and image_processor
feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor)
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
model_iterator = PipelinePackIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)

View File

@@ -67,6 +67,12 @@ class ImageSegmentationPipeline(Pipeline):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.image_processor is None and self.feature_extractor is not None:
# Backward compatible change, if users called
# ImageSegmentationPipeline(.., feature_extractor=MyFeatureExtractor())
# then we should keep working
self.image_processor = self.feature_extractor
if self.framework == "tf":
raise ValueError(f"The {self.__class__} is only available in PyTorch.")
@@ -137,7 +143,7 @@ class ImageSegmentationPipeline(Pipeline):
def preprocess(self, image):
image = load_image(image)
target_size = [(image.height, image.width)]
inputs = self.feature_extractor(images=[image], return_tensors="pt")
inputs = self.image_processor(images=[image], return_tensors="pt")
inputs["target_size"] = target_size
return inputs
@@ -152,10 +158,10 @@ class ImageSegmentationPipeline(Pipeline):
):
fn = None
if subtask in {"panoptic", None} and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"):
fn = self.feature_extractor.post_process_panoptic_segmentation
elif subtask in {"instance", None} and hasattr(self.feature_extractor, "post_process_instance_segmentation"):
fn = self.feature_extractor.post_process_instance_segmentation
if subtask in {"panoptic", None} and hasattr(self.image_processor, "post_process_panoptic_segmentation"):
fn = self.image_processor.post_process_panoptic_segmentation
elif subtask in {"instance", None} and hasattr(self.image_processor, "post_process_instance_segmentation"):
fn = self.image_processor.post_process_instance_segmentation
if fn is not None:
outputs = fn(
@@ -176,8 +182,8 @@ class ImageSegmentationPipeline(Pipeline):
score = segment["score"]
annotation.append({"score": score, "label": label, "mask": mask})
elif subtask in {"semantic", None} and hasattr(self.feature_extractor, "post_process_semantic_segmentation"):
outputs = self.feature_extractor.post_process_semantic_segmentation(
elif subtask in {"semantic", None} and hasattr(self.image_processor, "post_process_semantic_segmentation"):
outputs = self.image_processor.post_process_semantic_segmentation(
model_outputs, target_sizes=model_outputs["target_size"]
)[0]