Supporting ImageProcessor in place of FeatureExtractor for pipelines (#20851)

* Fixing the pipeline with image processor.

* Update the slow test.

* Using only the first image processor.

* Include exclusion mecanism for Image processor.

* Do not handle Gitconfig, deemed as a bug.

* Apply suggestions from code review

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Remove `conversational` changes. They are not supposed to be here.

* Address first row of comments.

* Remove OneFormer modifications.

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Nicolas Patry
2023-01-25 10:16:31 +01:00
committed by GitHub
parent efdbad56ab
commit 99e7905422
28 changed files with 138 additions and 47 deletions

View File

@@ -33,8 +33,10 @@ from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_
from requests.exceptions import HTTPError
from transformers import (
FEATURE_EXTRACTOR_MAPPING,
IMAGE_PROCESSOR_MAPPING,
TOKENIZER_MAPPING,
AutoFeatureExtractor,
AutoImageProcessor,
AutoModelForSequenceClassification,
AutoTokenizer,
DistilBertForSequenceClassification,
@@ -154,8 +156,6 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
feature_extractor = None
except Exception:
feature_extractor = None
if hasattr(tiny_config, "image_size") and feature_extractor:
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
# Audio Spectogram Transformer specific.
if feature_extractor.__class__.__name__ == "ASTFeatureExtractor":
@@ -168,9 +168,28 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
feature_extractor = feature_extractor.__class__(
feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
)
# TODO remove this, once those have been moved to `image_processor`.
if hasattr(tiny_config, "image_size") and feature_extractor:
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
return feature_extractor
def get_tiny_image_processor_from_checkpoint(checkpoint, tiny_config, image_processor_class):
try:
image_processor = AutoImageProcessor.from_pretrained(checkpoint)
except Exception:
try:
if image_processor_class is not None:
image_processor = image_processor_class()
else:
image_processor = None
except Exception:
image_processor = None
if hasattr(tiny_config, "image_size") and image_processor:
image_processor = image_processor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
return image_processor
class ANY:
def __init__(self, *_types):
self._types = _types
@@ -184,7 +203,9 @@ class ANY:
class PipelineTestCaseMeta(type):
def __new__(mcs, name, bases, dct):
def gen_test(ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class):
def gen_test(
ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class, image_processor_class
):
@skipIf(
tiny_config is None,
"TinyConfig does not exist, make sure that you defined a `_CONFIG_FOR_DOC` variable in the modeling"
@@ -231,16 +252,21 @@ class PipelineTestCaseMeta(type):
self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
else:
tokenizer = None
feature_extractor = get_tiny_feature_extractor_from_checkpoint(
checkpoint, tiny_config, feature_extractor_class
)
if tokenizer is None and feature_extractor is None:
image_processor = get_tiny_image_processor_from_checkpoint(
checkpoint, tiny_config, image_processor_class
)
if tokenizer is None and feature_extractor is None and image_processor:
self.skipTest(
f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with"
" no FastTokenizer ?)"
f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor or image_processor"
" (PerceiverConfig with no FastTokenizer ?)"
)
pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor)
pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor, image_processor)
if pipeline is None:
# The test can disable itself, but it should be very marginal
# Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
@@ -283,6 +309,10 @@ class PipelineTestCaseMeta(type):
feature_extractor_name = (
feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor"
)
image_processor_class = IMAGE_PROCESSOR_MAPPING.get(configuration, None)
image_processor_name = (
image_processor_class.__name__ if image_processor_class else "noimage_processor"
)
if not tokenizer_classes:
# We need to test even if there are no tokenizers.
tokenizer_classes = [None]
@@ -300,7 +330,7 @@ class PipelineTestCaseMeta(type):
else:
tokenizer_name = "notokenizer"
test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}"
test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}_{image_processor_name}"
if tokenizer_class is not None or feature_extractor_class is not None:
dct[test_name] = gen_test(
@@ -309,6 +339,7 @@ class PipelineTestCaseMeta(type):
tiny_config,
tokenizer_class,
feature_extractor_class,
image_processor_class,
)
@abstractmethod