diff --git a/.github/workflows/model-templates.yml b/.github/workflows/model-templates.yml index 9c5e5a6d1c..ab0f7a9aad 100644 --- a/.github/workflows/model-templates.yml +++ b/.github/workflows/model-templates.yml @@ -37,6 +37,7 @@ jobs: - name: Install dependencies run: | pip install --upgrade pip + sudo apt -y update && sudo apt install -y libsndfile1-dev pip install .[dev] - name: Create model files run: | diff --git a/docs/source/main_classes/pipelines.rst b/docs/source/main_classes/pipelines.rst index df003f490b..df87ddd306 100644 --- a/docs/source/main_classes/pipelines.rst +++ b/docs/source/main_classes/pipelines.rst @@ -36,6 +36,7 @@ There are two categories of pipeline abstractions to be aware about: - :class:`~transformers.ZeroShotClassificationPipeline` - :class:`~transformers.Text2TextGenerationPipeline` - :class:`~transformers.TableQuestionAnsweringPipeline` + - :class:`~transformers.ImageClassificationPipeline` The pipeline abstraction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -79,6 +80,13 @@ FillMaskPipeline :special-members: __call__ :members: +ImageClassificationPipeline +======================================================================================================================= + +.. autoclass:: transformers.ImageClassificationPipeline + :special-members: __call__ + :members: + NerPipeline ======================================================================================================================= diff --git a/docs/source/model_doc/auto.rst b/docs/source/model_doc/auto.rst index e0e76c7795..e6aa9ad57e 100644 --- a/docs/source/model_doc/auto.rst +++ b/docs/source/model_doc/auto.rst @@ -128,6 +128,13 @@ AutoModelForTableQuestionAnswering :members: +AutoModelForImageClassification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: transformers.AutoModelForImageClassification + :members: + + TFAutoModel ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index b1de18192c..3e5fb363b7 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -244,6 +244,7 @@ _import_structure = { "CsvPipelineDataFormat", "FeatureExtractionPipeline", "FillMaskPipeline", + "ImageClassificationPipeline", "JsonPipelineDataFormat", "NerPipeline", "PipedPipelineDataFormat", @@ -483,6 +484,7 @@ if is_torch_available(): "MODEL_WITH_LM_HEAD_MAPPING", "AutoModel", "AutoModelForCausalLM", + "AutoModelForImageClassification", "AutoModelForMaskedLM", "AutoModelForMultipleChoice", "AutoModelForNextSentencePrediction", @@ -1640,6 +1642,7 @@ if TYPE_CHECKING: CsvPipelineDataFormat, FeatureExtractionPipeline, FillMaskPipeline, + ImageClassificationPipeline, JsonPipelineDataFormat, NerPipeline, PipedPipelineDataFormat, @@ -1845,6 +1848,7 @@ if TYPE_CHECKING: MODEL_WITH_LM_HEAD_MAPPING, AutoModel, AutoModelForCausalLM, + AutoModelForImageClassification, AutoModelForMaskedLM, AutoModelForMultipleChoice, AutoModelForNextSentencePrediction, diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index f7bf49c400..430666b04f 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -226,7 +226,7 @@ class FeatureExtractionMixin: :func:`~transformers.feature_extraction_utils.FeatureExtractionMixin.save_pretrained` method, e.g., ``./my_model_directory/``. - a path or url to a saved feature extractor JSON `file`, e.g., - ``./my_model_directory/feature_extraction_config.json``. + ``./my_model_directory/preprocessor_config.json``. cache_dir (:obj:`str` or :obj:`os.PathLike`, `optional`): Path to a directory in which a downloaded pretrained model feature extractor should be cached if the standard cache should not be used. diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index 496e4d5b74..6c6a3a7051 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -14,34 +14,26 @@ # limitations under the License. """ AutoFeatureExtractor class. """ +import os from collections import OrderedDict +from transformers import DeiTFeatureExtractor, Speech2TextFeatureExtractor, ViTFeatureExtractor + +from ... import DeiTConfig, PretrainedConfig, Speech2TextConfig, ViTConfig, Wav2Vec2Config from ...feature_extraction_utils import FeatureExtractionMixin -from ...file_utils import is_speech_available, is_vision_available -from ..wav2vec2.feature_extraction_wav2vec2 import Wav2Vec2FeatureExtractor -from .configuration_auto import replace_list_option_in_docstrings - - -if is_speech_available(): - from ..speech_to_text.feature_extraction_speech_to_text import Speech2TextFeatureExtractor -else: - Speech2TextFeatureExtractor = None - -if is_vision_available(): - from ..deit.feature_extraction_deit import DeiTFeatureExtractor - from ..vit.feature_extraction_vit import ViTFeatureExtractor -else: - DeiTFeatureExtractor = None - ViTFeatureExtractor = None - # Build the list of all feature extractors +from ...file_utils import FEATURE_EXTRACTOR_NAME +from ..wav2vec2.feature_extraction_wav2vec2 import Wav2Vec2FeatureExtractor +from .configuration_auto import AutoConfig, replace_list_option_in_docstrings + + FEATURE_EXTRACTOR_MAPPING = OrderedDict( [ - ("deit", DeiTFeatureExtractor), - ("s2t", Speech2TextFeatureExtractor), - ("vit", ViTFeatureExtractor), - ("wav2vec2", Wav2Vec2FeatureExtractor), + (DeiTConfig, DeiTFeatureExtractor), + (Speech2TextConfig, Speech2TextFeatureExtractor), + (ViTConfig, ViTFeatureExtractor), + (Wav2Vec2Config, Wav2Vec2FeatureExtractor), ] ) @@ -89,7 +81,7 @@ class AutoFeatureExtractor: :func:`~transformers.feature_extraction_utils.FeatureExtractionMixin.save_pretrained` method, e.g., ``./my_model_directory/``. - a path or url to a saved feature extractor JSON `file`, e.g., - ``./my_model_directory/feature_extraction_config.json``. + ``./my_model_directory/preprocessor_config.json``. cache_dir (:obj:`str` or :obj:`os.PathLike`, `optional`): Path to a directory in which a downloaded pretrained model feature extractor should be cached if the standard cache should not be used. @@ -134,20 +126,29 @@ class AutoFeatureExtractor: >>> feature_extractor = AutoFeatureExtractor.from_pretrained('./test/saved_model/') """ + config = kwargs.pop("config", None) + kwargs["_from_auto"] = True + + is_feature_extraction_file = os.path.isfile(pretrained_model_name_or_path) + is_directory = os.path.isdir(pretrained_model_name_or_path) and os.path.exists( + os.path.join(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME) + ) + + if not is_feature_extraction_file and not is_directory: + if not isinstance(config, PretrainedConfig): + config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs) + kwargs["_from_auto"] = True config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(pretrained_model_name_or_path, **kwargs) - if "feature_extractor_type" in config_dict: + if type(config) in FEATURE_EXTRACTOR_MAPPING.keys(): + return FEATURE_EXTRACTOR_MAPPING[type(config)].from_dict(config_dict, **kwargs) + elif "feature_extractor_type" in config_dict: feature_extractor_class = feature_extractor_class_from_name(config_dict["feature_extractor_type"]) return feature_extractor_class.from_dict(config_dict, **kwargs) - else: - # Fallback: use pattern matching on the string. - for pattern, feature_extractor_class in FEATURE_EXTRACTOR_MAPPING.items(): - if pattern in str(pretrained_model_name_or_path): - return feature_extractor_class.from_dict(config_dict, **kwargs) raise ValueError( f"Unrecognized model in {pretrained_model_name_or_path}. Should have a `feature_extractor_type` key in " - "its feature_extraction_config.json, or contain one of the following strings " + f"its {FEATURE_EXTRACTOR_NAME}, or contain one of the following strings " f"in its name: {', '.join(FEATURE_EXTRACTOR_MAPPING.keys())}" ) diff --git a/src/transformers/models/speech_to_text/processing_speech_to_text.py b/src/transformers/models/speech_to_text/processing_speech_to_text.py index af79e9c64a..4f46217562 100644 --- a/src/transformers/models/speech_to_text/processing_speech_to_text.py +++ b/src/transformers/models/speech_to_text/processing_speech_to_text.py @@ -97,7 +97,7 @@ class Speech2TextProcessor: :meth:`~transformers.PreTrainedFeatureExtractor.save_pretrained` method, e.g., ``./my_model_directory/``. - a path or url to a saved feature extractor JSON `file`, e.g., - ``./my_model_directory/feature_extraction_config.json``. + ``./my_model_directory/preprocessor_config.json``. **kwargs Additional keyword arguments passed along to both :class:`~transformers.PreTrainedFeatureExtractor` and :class:`~transformers.PreTrainedTokenizer` diff --git a/src/transformers/models/wav2vec2/processing_wav2vec2.py b/src/transformers/models/wav2vec2/processing_wav2vec2.py index bafbcdebbc..04f9233fdf 100644 --- a/src/transformers/models/wav2vec2/processing_wav2vec2.py +++ b/src/transformers/models/wav2vec2/processing_wav2vec2.py @@ -96,7 +96,7 @@ class Wav2Vec2Processor: :meth:`~transformers.SequenceFeatureExtractor.save_pretrained` method, e.g., ``./my_model_directory/``. - a path or url to a saved feature extractor JSON `file`, e.g., - ``./my_model_directory/feature_extraction_config.json``. + ``./my_model_directory/preprocessor_config.json``. **kwargs Additional keyword arguments passed along to both :class:`~transformers.SequenceFeatureExtractor` and :class:`~transformers.PreTrainedTokenizer` diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index e16e96654e..09b8e58a91 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -20,9 +20,12 @@ import warnings from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union from ..configuration_utils import PretrainedConfig +from ..feature_extraction_utils import PreTrainedFeatureExtractor from ..file_utils import is_tf_available, is_torch_available from ..modelcard import ModelCard -from ..models.auto.tokenization_auto import AutoTokenizer +from ..models.auto.configuration_auto import AutoConfig +from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor +from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from ..tokenization_utils import PreTrainedTokenizer from ..utils import logging from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline @@ -40,6 +43,7 @@ from .base import ( from .conversational import Conversation, ConversationalPipeline from .feature_extraction import FeatureExtractionPipeline from .fill_mask import FillMaskPipeline +from .image_classification import ImageClassificationPipeline from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline @@ -79,6 +83,7 @@ if is_torch_available(): MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, AutoModel, AutoModelForCausalLM, + AutoModelForImageClassification, AutoModelForMaskedLM, AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, @@ -198,6 +203,12 @@ SUPPORTED_TASKS = { "pt": AutoModelForCausalLM if is_torch_available() else None, "default": {"model": {"pt": "microsoft/DialoGPT-medium", "tf": "microsoft/DialoGPT-medium"}}, }, + "image-classification": { + "impl": ImageClassificationPipeline, + "tf": None, + "pt": AutoModelForImageClassification if is_torch_available() else None, + "default": {"model": {"pt": "google/vit-base-patch16-224"}}, + }, } @@ -252,6 +263,7 @@ def pipeline( model: Optional = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, + feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, @@ -309,6 +321,18 @@ def pipeline( :obj:`model` is not specified or not a string, then the default tokenizer for :obj:`config` is loaded (if it is a string). However, if :obj:`config` is also not given or not a string, then the default tokenizer for the given :obj:`task` will be loaded. + feature_extractor (:obj:`str` or :obj:`~transformers.PreTrainedFeatureExtractor`, `optional`): + The feature extractor that will be used by the pipeline to encode data for the model. This can be a model + identifier or an actual pretrained feature extractor inheriting from + :class:`~transformers.PreTrainedFeatureExtractor`. + + Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal + models. Multi-modal models will also require a tokenizer to be passed. + + If not provided, the default feature extractor for the given :obj:`model` will be loaded (if it is a + string). If :obj:`model` is not specified or not a string, then the default feature extractor for + :obj:`config` is loaded (if it is a string). However, if :obj:`config` is also not given or not a string, + then the default feature extractor for the given :obj:`task` will be loaded. framework (:obj:`str`, `optional`): The framework to use, either :obj:`"pt"` for PyTorch or :obj:`"tf"` for TensorFlow. The specified framework must be installed. @@ -359,19 +383,7 @@ def pipeline( # At that point framework might still be undetermined model = get_default_model(targeted_task, framework, task_options) - # Try to infer tokenizer from model or config name (if provided as str) - if tokenizer is None: - if isinstance(model, str): - tokenizer = model - elif isinstance(config, str): - tokenizer = config - else: - # Impossible to guest what is the right tokenizer here - raise Exception( - "Impossible to guess which tokenizer to use. " - "Please provided a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer." - ) - + model_name = model if isinstance(model, str) else None modelcard = None # Try to infer modelcard from model or config name (if provided as str) if isinstance(model, str): @@ -388,19 +400,6 @@ def pipeline( # Retrieve use_auth_token and add it to model_kwargs to be used in .from_pretrained model_kwargs["use_auth_token"] = model_kwargs.get("use_auth_token", use_auth_token) - # Instantiate tokenizer if needed - if isinstance(tokenizer, (str, tuple)): - if isinstance(tokenizer, tuple): - # For tuple we have (tokenizer name, {kwargs}) - use_fast = tokenizer[1].pop("use_fast", use_fast) - tokenizer = AutoTokenizer.from_pretrained( - tokenizer[0], use_fast=use_fast, revision=revision, _from_pipeline=task, **tokenizer[1] - ) - else: - tokenizer = AutoTokenizer.from_pretrained( - tokenizer, revision=revision, use_fast=use_fast, _from_pipeline=task, **model_kwargs - ) - # Instantiate config if needed if isinstance(config, str): config = AutoConfig.from_pretrained(config, revision=revision, _from_pipeline=task, **model_kwargs) @@ -434,6 +433,61 @@ def pipeline( model, config=config, revision=revision, _from_pipeline=task, **model_kwargs ) + model_config = model.config + + load_tokenizer = type(model_config) in TOKENIZER_MAPPING + load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING + + if load_tokenizer: + # Try to infer tokenizer from model or config name (if provided as str) + if tokenizer is None: + if isinstance(model_name, str): + tokenizer = model_name + elif isinstance(config, str): + tokenizer = config + else: + # Impossible to guess what is the right tokenizer here + raise Exception( + "Impossible to guess which tokenizer to use. " + "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer." + ) + + # Instantiate tokenizer if needed + if isinstance(tokenizer, (str, tuple)): + if isinstance(tokenizer, tuple): + # For tuple we have (tokenizer name, {kwargs}) + use_fast = tokenizer[1].pop("use_fast", use_fast) + tokenizer_identifier = tokenizer[0] + tokenizer_kwargs = tokenizer[1] + else: + tokenizer_identifier = tokenizer + tokenizer_kwargs = model_kwargs + + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_identifier, revision=revision, use_fast=use_fast, _from_pipeline=task, **tokenizer_kwargs + ) + + if load_feature_extractor: + # Try to infer feature extractor from model or config name (if provided as str) + if feature_extractor is None: + if isinstance(model_name, str): + feature_extractor = model_name + elif isinstance(config, str): + feature_extractor = config + else: + # Impossible to guess what is the right feature_extractor here + raise Exception( + "Impossible to guess which feature extractor to use. " + "Please provide a PreTrainedFeatureExtractor class or a path/identifier " + "to a pretrained feature extractor." + ) + + # Instantiate feature_extractor if needed + if isinstance(feature_extractor, (str, tuple)): + feature_extractor = AutoFeatureExtractor.from_pretrained( + feature_extractor, revision=revision, _from_pipeline=task, **model_kwargs + ) + if task == "translation" and model.config.task_specific_params: for key in model.config.task_specific_params: if key.startswith("translation"): @@ -444,4 +498,16 @@ def pipeline( ) break - return task_class(model=model, tokenizer=tokenizer, modelcard=modelcard, framework=framework, task=task, **kwargs) + if tokenizer is not None: + kwargs["tokenizer"] = tokenizer + + if feature_extractor is not None: + kwargs["feature_extractor"] = feature_extractor + + return task_class( + model=model, + modelcard=modelcard, + framework=framework, + task=task, + **kwargs, + ) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 63ddd79971..05bf389b8a 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -23,6 +23,7 @@ from contextlib import contextmanager from os.path import abspath, exists from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from ..feature_extraction_utils import PreTrainedFeatureExtractor from ..file_utils import add_end_docstrings, is_tf_available, is_torch_available from ..modelcard import ModelCard from ..tokenization_utils import PreTrainedTokenizer, TruncationStrategy @@ -522,7 +523,8 @@ class Pipeline(_ScikitCompat): def __init__( self, model: Union["PreTrainedModel", "TFPreTrainedModel"], - tokenizer: PreTrainedTokenizer, + tokenizer: Optional[PreTrainedTokenizer] = None, + feature_extractor: Optional[PreTrainedFeatureExtractor] = None, modelcard: Optional[ModelCard] = None, framework: Optional[str] = None, task: str = "", @@ -537,6 +539,7 @@ class Pipeline(_ScikitCompat): self.task = task self.model = model self.tokenizer = tokenizer + self.feature_extractor = feature_extractor self.modelcard = modelcard self.framework = framework self.device = device if framework == "tf" else torch.device("cpu" if device < 0 else f"cuda:{device}") @@ -565,7 +568,13 @@ class Pipeline(_ScikitCompat): os.makedirs(save_directory, exist_ok=True) self.model.save_pretrained(save_directory) - self.tokenizer.save_pretrained(save_directory) + + if self.tokenizer is not None: + self.tokenizer.save_pretrained(save_directory) + + if self.feature_extractor is not None: + self.feature_extractor.save_pretrained(save_directory) + if self.modelcard is not None: self.modelcard.save_pretrained(save_directory) @@ -630,7 +639,14 @@ class Pipeline(_ScikitCompat): The list of models supported by the pipeline, or a dictionary with model class values. """ if not isinstance(supported_models, list): # Create from a model mapping - supported_models = [item[1].__name__ for item in supported_models.items()] + supported_models_names = [] + for config, model in supported_models.items(): + # Mapping can now contain tuples of models for the same configuration. + if isinstance(model, tuple): + supported_models_names.extend([_model.__name__ for _model in model]) + else: + supported_models_names.append(model.__name__) + supported_models = supported_models_names if self.model.__class__.__name__ not in supported_models: raise PipelineException( self.task, diff --git a/src/transformers/pipelines/image_classification.py b/src/transformers/pipelines/image_classification.py new file mode 100644 index 0000000000..eb0410f322 --- /dev/null +++ b/src/transformers/pipelines/image_classification.py @@ -0,0 +1,129 @@ +import os +from typing import TYPE_CHECKING, List, Optional, Union + +import requests + +from ..feature_extraction_utils import PreTrainedFeatureExtractor +from ..file_utils import add_end_docstrings, is_torch_available, is_vision_available, requires_backends +from ..utils import logging +from .base import PIPELINE_INIT_ARGS, Pipeline + + +if TYPE_CHECKING: + from ..modeling_tf_utils import TFPreTrainedModel + from ..modeling_utils import PreTrainedModel + +if is_vision_available(): + from PIL import Image + +if is_torch_available(): + import torch + + from ..models.auto.modeling_auto import MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING + +logger = logging.get_logger(__name__) + + +@add_end_docstrings(PIPELINE_INIT_ARGS) +class ImageClassificationPipeline(Pipeline): + """ + Image classification pipeline using any :obj:`AutoModelForImageClassification`. This pipeline predicts the class of + an image. + + This image classification pipeline can currently be loaded from :func:`~transformers.pipeline` using the following + task identifier: :obj:`"image-classification"`. + + See the list of available models on `huggingface.co/models + `__. + """ + + def __init__( + self, + model: Union["PreTrainedModel", "TFPreTrainedModel"], + feature_extractor: PreTrainedFeatureExtractor, + framework: Optional[str] = None, + **kwargs + ): + super().__init__(model, feature_extractor=feature_extractor, framework=framework, **kwargs) + + if self.framework == "tf": + raise ValueError(f"The {self.__class__} is only available in PyTorch.") + + requires_backends(self, "vision") + + self.check_model_type(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING) + + self.feature_extractor = feature_extractor + + @staticmethod + def load_image(image: Union[str, "Image.Image"]): + if isinstance(image, str): + if image.startswith("http://") or image.startswith("https://"): + # We need to actually check for a real protocol, otherwise it's impossible to use a local file + # like http_huggingface_co.png + return Image.open(requests.get(image, stream=True).raw) + elif os.path.isfile(image): + return Image.open(image) + elif isinstance(image, Image.Image): + return image + + raise ValueError( + "Incorrect format used for image. Should be an url linking to an image, a local path, or a PIL image." + ) + + def __call__(self, images: Union[str, List[str], "Image", List["Image"]], top_k=5): + """ + Assign labels to the image(s) passed as inputs. + + Args: + images (:obj:`str`, :obj:`List[str]`, :obj:`PIL.Image` or :obj:`List[PIL.Image]`): + The pipeline handles three types of images: + + - A string containing a http link pointing to an image + - A string containing a local path to an image + - An image loaded in PIL directly + + The pipeline accepts either a single image or a batch of images, which must then be passed as a string. + Images in a batch must all be in the same format: all as http links, all as local paths, or all as PIL + images. + top_k (:obj:`int`, `optional`, defaults to 5): + The number of top labels that will be returned by the pipeline. + + Return: + A dictionary or a list of dictionaries containing result. If the input is a single image, will return a + dictionary, if the input is a list of several images, will return a list of dictionaries corresponding to + the images. + + The dictionaries contain the following keys: + + - **label** (:obj:`str`) -- The label identified by the model. + - **score** (:obj:`int`) -- The score attributed by the model for that label. + """ + is_batched = isinstance(images, list) + + if not is_batched: + images = [images] + + images = [self.load_image(image) for image in images] + + with torch.no_grad(): + inputs = self.feature_extractor(images=images, return_tensors="pt") + outputs = self.model(**inputs) + + probs = outputs.logits.softmax(-1) + scores, ids = probs.topk(top_k) + + scores = scores.tolist() + ids = ids.tolist() + + if not is_batched: + scores, ids = scores[0], ids[0] + labels = [{"score": score, "label": self.model.config.id2label[_id]} for score, _id in zip(scores, ids)] + else: + labels = [] + for scores, ids in zip(scores, ids): + labels.append( + [{"score": score, "label": self.model.config.id2label[_id]} for score, _id in zip(scores, ids)] + ) + + return labels diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index b4b160dbe3..158c7f7381 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -376,6 +376,15 @@ class AutoModelForCausalLM: requires_backends(self, ["torch"]) +class AutoModelForImageClassification: + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_pretrained(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class AutoModelForMaskedLM: def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) diff --git a/tests/fixtures/coco.jpg b/tests/fixtures/coco.jpg new file mode 100644 index 0000000000..d32344928e Binary files /dev/null and b/tests/fixtures/coco.jpg differ diff --git a/tests/fixtures/preprocessor_config.json b/tests/fixtures/preprocessor_config.json new file mode 100644 index 0000000000..cf0c5dce6c --- /dev/null +++ b/tests/fixtures/preprocessor_config.json @@ -0,0 +1,3 @@ +{ + "feature_extractor_type": "Wav2Vec2FeatureExtractor" +} \ No newline at end of file diff --git a/tests/test_feature_extraction_auto.py b/tests/test_feature_extraction_auto.py index 71ee32c230..7502e84224 100644 --- a/tests/test_feature_extraction_auto.py +++ b/tests/test_feature_extraction_auto.py @@ -16,9 +16,10 @@ import os import unittest -from transformers import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor, Wav2Vec2FeatureExtractor +from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor +SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures") SAMPLE_FEATURE_EXTRACTION_CONFIG = os.path.join( os.path.dirname(os.path.abspath(__file__)), "fixtures/dummy_feature_extractor_config.json" ) @@ -29,16 +30,10 @@ class AutoFeatureExtractorTest(unittest.TestCase): config = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h") self.assertIsInstance(config, Wav2Vec2FeatureExtractor) + def test_feature_extractor_from_local_directory(self): + config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR) + self.assertIsInstance(config, Wav2Vec2FeatureExtractor) + def test_feature_extractor_from_local_file(self): config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG) self.assertIsInstance(config, Wav2Vec2FeatureExtractor) - - def test_pattern_matching_fallback(self): - """ - In cases where config.json doesn't include a model_type, - perform a few safety checks on the config mapping's order. - """ - # no key string should be included in a later key string (typical failure case) - keys = list(FEATURE_EXTRACTOR_MAPPING.keys()) - for i, key in enumerate(keys): - self.assertFalse(any(key in later_key for later_key in keys[i + 1 :])) diff --git a/tests/test_pipelines_image_classification.py b/tests/test_pipelines_image_classification.py new file mode 100644 index 0000000000..32b1317461 --- /dev/null +++ b/tests/test_pipelines_image_classification.py @@ -0,0 +1,115 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from transformers import ( + AutoFeatureExtractor, + AutoModelForImageClassification, + PreTrainedTokenizer, + is_vision_available, +) +from transformers.pipelines import ImageClassificationPipeline, pipeline +from transformers.testing_utils import require_torch, require_vision + + +if is_vision_available(): + from PIL import Image +else: + + class Image: + @staticmethod + def open(*args, **kwargs): + pass + + +@require_vision +@require_torch +class ImageClassificationPipelineTests(unittest.TestCase): + pipeline_task = "image-classification" + small_models = ["lysandre/tiny-vit-random"] # Models tested without the @slow decorator + valid_inputs = [ + {"images": "http://images.cocodataset.org/val2017/000000039769.jpg"}, + { + "images": [ + "http://images.cocodataset.org/val2017/000000039769.jpg", + "http://images.cocodataset.org/val2017/000000039769.jpg", + ] + }, + {"images": "tests/fixtures/coco.jpg"}, + {"images": ["tests/fixtures/coco.jpg", "tests/fixtures/coco.jpg"]}, + {"images": Image.open("tests/fixtures/coco.jpg")}, + {"images": [Image.open("tests/fixtures/coco.jpg"), Image.open("tests/fixtures/coco.jpg")]}, + {"images": [Image.open("tests/fixtures/coco.jpg"), "tests/fixtures/coco.jpg"]}, + ] + + def test_small_model_from_factory(self): + for small_model in self.small_models: + + image_classifier = pipeline("image-classification", model=small_model) + + for valid_input in self.valid_inputs: + output = image_classifier(**valid_input) + top_k = valid_input.get("top_k", 5) + + def assert_valid_pipeline_output(pipeline_output): + self.assertTrue(isinstance(pipeline_output, list)) + self.assertEqual(len(pipeline_output), top_k) + for label_result in pipeline_output: + self.assertTrue(isinstance(label_result, dict)) + self.assertIn("label", label_result) + self.assertIn("score", label_result) + + if isinstance(valid_input["images"], list): + self.assertEqual(len(valid_input["images"]), len(output)) + for individual_output in output: + assert_valid_pipeline_output(individual_output) + else: + assert_valid_pipeline_output(output) + + def test_small_model_from_pipeline(self): + for small_model in self.small_models: + + model = AutoModelForImageClassification.from_pretrained(small_model) + feature_extractor = AutoFeatureExtractor.from_pretrained(small_model) + image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor) + + for valid_input in self.valid_inputs: + output = image_classifier(**valid_input) + top_k = valid_input.get("top_k", 5) + + def assert_valid_pipeline_output(pipeline_output): + self.assertTrue(isinstance(pipeline_output, list)) + self.assertEqual(len(pipeline_output), top_k) + for label_result in pipeline_output: + self.assertTrue(isinstance(label_result, dict)) + self.assertIn("label", label_result) + self.assertIn("score", label_result) + + if isinstance(valid_input["images"], list): + # When images are batched, pipeline output is a list of lists of dictionaries + self.assertEqual(len(valid_input["images"]), len(output)) + for individual_output in output: + assert_valid_pipeline_output(individual_output) + else: + # When images are batched, pipeline output is a list of dictionaries + assert_valid_pipeline_output(output) + + def test_custom_tokenizer(self): + tokenizer = PreTrainedTokenizer() + + # Assert that the pipeline can be initialized with a feature extractor that is not in any mapping + image_classifier = pipeline("image-classification", model=self.small_models[0], tokenizer=tokenizer) + + self.assertIs(image_classifier.tokenizer, tokenizer)