From babeff5524bf3d5d62cfa70e1297158a755b0810 Mon Sep 17 00:00:00 2001 From: Patrick Deutschmann Date: Fri, 3 Jun 2022 13:40:22 +0200 Subject: [PATCH] Add support for Perceiver ONNX export (#17213) * Start adding perceiver support for ONNX * Fix pad token bug for fast tokenizers * Fix formatting * Make get_preprocesor more opinionated (processor priority, otherwise tokenizer/feature extractor) * Clean docs format * Minor cleanup following @sgugger's comments * Fix typo in docs * Fix another docs typo * Fix one more typo in docs * Update src/transformers/onnx/utils.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/onnx/utils.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/onnx/utils.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- docs/source/en/serialization.mdx | 1 + .../perceiver/configuration_perceiver.py | 69 ++++++++++++++++++- .../models/perceiver/modeling_perceiver.py | 11 +-- src/transformers/onnx/__main__.py | 26 ++++--- src/transformers/onnx/features.py | 22 ++++++ src/transformers/onnx/utils.py | 41 +++++++++++ tests/onnx/test_onnx_v2.py | 38 +++++----- 7 files changed, 178 insertions(+), 30 deletions(-) diff --git a/docs/source/en/serialization.mdx b/docs/source/en/serialization.mdx index c85c85f40c..e6ba52e39f 100644 --- a/docs/source/en/serialization.mdx +++ b/docs/source/en/serialization.mdx @@ -70,6 +70,7 @@ Ready-made configurations include the following architectures: - mBART - MobileBERT - OpenAI GPT-2 +- Perceiver - PLBart - RoBERTa - RoFormer diff --git a/src/transformers/models/perceiver/configuration_perceiver.py b/src/transformers/models/perceiver/configuration_perceiver.py index fdf1f01243..0c97974441 100644 --- a/src/transformers/models/perceiver/configuration_perceiver.py +++ b/src/transformers/models/perceiver/configuration_perceiver.py @@ -14,8 +14,15 @@ # limitations under the License. """ Perceiver model configuration""" +from collections import OrderedDict +from typing import Any, Mapping, Optional, Union + from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ...feature_extraction_utils import FeatureExtractionMixin +from ...onnx import OnnxConfig +from ...onnx.utils import compute_effective_axis_dimension +from ...tokenization_utils_base import PreTrainedTokenizerBase +from ...utils import TensorType, logging logger = logging.get_logger(__name__) @@ -172,3 +179,63 @@ class PerceiverConfig(PretrainedConfig): self.audio_samples_per_frame = audio_samples_per_frame self.samples_per_patch = samples_per_patch self.output_shape = output_shape + + +class PerceiverOnnxConfig(OnnxConfig): + @property + def inputs(self) -> Mapping[str, Mapping[int, str]]: + if self.task == "multiple-choice": + dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"} + else: + dynamic_axis = {0: "batch", 1: "sequence"} + return OrderedDict( + [ + ("inputs", dynamic_axis), + ("attention_mask", dynamic_axis), + ] + ) + + @property + def atol_for_validation(self) -> float: + return 1e-4 + + def generate_dummy_inputs( + self, + preprocessor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"], + batch_size: int = -1, + seq_length: int = -1, + num_choices: int = -1, + is_pair: bool = False, + framework: Optional[TensorType] = None, + num_channels: int = 3, + image_width: int = 40, + image_height: int = 40, + ) -> Mapping[str, Any]: + # copied from `transformers.onnx.config.OnnxConfig` and slightly altered/simplified + + if isinstance(preprocessor, PreTrainedTokenizerBase): + # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX + batch_size = compute_effective_axis_dimension( + batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0 + ) + # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX + token_to_add = preprocessor.num_special_tokens_to_add(is_pair) + seq_length = compute_effective_axis_dimension( + seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add + ) + # Generate dummy inputs according to compute batch and sequence + dummy_input = [" ".join(["a"]) * seq_length] * batch_size + inputs = dict(preprocessor(dummy_input, return_tensors=framework)) + inputs["inputs"] = inputs.pop("input_ids") + return inputs + elif isinstance(preprocessor, FeatureExtractionMixin) and preprocessor.model_input_names[0] == "pixel_values": + # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX + batch_size = compute_effective_axis_dimension(batch_size, fixed_dimension=OnnxConfig.default_fixed_batch) + dummy_input = self._generate_dummy_images(batch_size, num_channels, image_height, image_width) + inputs = dict(preprocessor(images=dummy_input, return_tensors=framework)) + inputs["inputs"] = inputs.pop("pixel_values") + return inputs + else: + raise ValueError( + "Unable to generate dummy inputs for the model. Please provide a tokenizer or a preprocessor." + ) diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index d5b66bd80b..364bc67c8d 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -2735,7 +2735,9 @@ def _check_or_build_spatial_positions(pos, index_dims, batch_size): """ if pos is None: pos = build_linear_positions(index_dims) - pos = torch.broadcast_to(pos[None], (batch_size,) + pos.shape) + # equivalent to `torch.broadcast_to(pos[None], (batch_size,) + pos.shape)` + # but `torch.broadcast_to` cannot be converted to ONNX + pos = pos[None].expand((batch_size,) + pos.shape) pos = torch.reshape(pos, [batch_size, np.prod(index_dims), -1]) else: # Just a warning label: you probably don't want your spatial features to @@ -2840,7 +2842,8 @@ class PerceiverEmbeddingDecoder(nn.Module): def forward(self, hidden_states: torch.Tensor, embedding_layer: torch.Tensor) -> torch.Tensor: batch_size, seq_len, d_model = hidden_states.shape - output = torch.matmul(hidden_states.reshape([-1, d_model]), embedding_layer.weight.T) # Flatten batch dim + # Flatten batch dim + output = torch.matmul(hidden_states.reshape([-1, d_model]), embedding_layer.weight.transpose(0, 1)) output = output + self.bias return output.reshape([batch_size, seq_len, self.vocab_size]) @@ -3166,9 +3169,9 @@ class PerceiverImagePreprocessor(AbstractPreprocessor): if self.prep_type != "patches": # move channels to last dimension, as the _build_network_inputs method below expects this if inputs.ndim == 4: - inputs = torch.moveaxis(inputs, 1, -1) + inputs = torch.permute(inputs, (0, 2, 3, 1)) elif inputs.ndim == 5: - inputs = torch.moveaxis(inputs, 2, -1) + inputs = torch.permute(inputs, (0, 1, 3, 4, 2)) else: raise ValueError("Unsupported data format for conv1x1.") diff --git a/src/transformers/onnx/__main__.py b/src/transformers/onnx/__main__.py index 6e3b4404cd..6d665b3556 100644 --- a/src/transformers/onnx/__main__.py +++ b/src/transformers/onnx/__main__.py @@ -15,9 +15,8 @@ from argparse import ArgumentParser from pathlib import Path -from ..models.auto import AutoConfig, AutoFeatureExtractor, AutoTokenizer -from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING_NAMES -from ..models.auto.tokenization_auto import TOKENIZER_MAPPING_NAMES +from ..models.auto import AutoFeatureExtractor, AutoProcessor, AutoTokenizer +from ..onnx.utils import get_preprocessor from ..utils import logging from .convert import export, validate_model_outputs from .features import FeaturesManager @@ -43,6 +42,13 @@ def main(): ) parser.add_argument("output", type=Path, help="Path indicating where to store generated ONNX model.") parser.add_argument("--cache_dir", type=str, default=None, help="Path indicating where to store cache.") + parser.add_argument( + "--preprocessor", + type=str, + choices=["auto", "tokenizer", "feature_extractor", "processor"], + default="auto", + help="Which type of preprocessor to use. 'auto' tries to automatically detect it.", + ) # Retrieve CLI arguments args = parser.parse_args() @@ -51,15 +57,17 @@ def main(): if not args.output.parent.exists(): args.output.parent.mkdir(parents=True) - # Check the modality of the inputs and instantiate the appropriate preprocessor - # TODO(lewtun): Refactor this as a function if we need to check modalities elsewhere as well - config = AutoConfig.from_pretrained(args.model) - if config.model_type in TOKENIZER_MAPPING_NAMES: + # Instantiate the appropriate preprocessor + if args.preprocessor == "auto": + preprocessor = get_preprocessor(args.model) + elif args.preprocessor == "tokenizer": preprocessor = AutoTokenizer.from_pretrained(args.model) - elif config.model_type in FEATURE_EXTRACTOR_MAPPING_NAMES: + elif args.preprocessor == "feature_extractor": preprocessor = AutoFeatureExtractor.from_pretrained(args.model) + elif args.preprocessor == "processor": + preprocessor = AutoProcessor.from_pretrained(args.model) else: - raise ValueError(f"Unsupported model type: {config.model_type}") + raise ValueError(f"Unknown preprocessor type '{args.preprocessor}'") # Allocate the model model = FeaturesManager.get_model_from_feature( diff --git a/src/transformers/onnx/features.py b/src/transformers/onnx/features.py index 9013618e02..83d424655a 100644 --- a/src/transformers/onnx/features.py +++ b/src/transformers/onnx/features.py @@ -26,6 +26,7 @@ from ..models.m2m_100 import M2M100OnnxConfig from ..models.marian import MarianOnnxConfig from ..models.mbart import MBartOnnxConfig from ..models.mobilebert import MobileBertOnnxConfig +from ..models.perceiver.configuration_perceiver import PerceiverOnnxConfig from ..models.roberta import RobertaOnnxConfig from ..models.roformer import RoFormerOnnxConfig from ..models.squeezebert import SqueezeBertOnnxConfig @@ -332,6 +333,12 @@ class FeaturesManager: "m2m-100": supported_features_mapping( "default", "default-with-past", "seq2seq-lm", "seq2seq-lm-with-past", onnx_config_cls=M2M100OnnxConfig ), + "perceiver": supported_features_mapping( + "image-classification", + "masked-lm", + "sequence-classification", + onnx_config_cls=PerceiverOnnxConfig, + ), "roberta": supported_features_mapping( "default", "masked-lm", @@ -516,3 +523,18 @@ class FeaturesManager: ) return model.config.model_type, FeaturesManager._SUPPORTED_MODEL_TYPE[model_type][feature] + + def get_config(model_type: str, feature: str) -> OnnxConfig: + """ + Gets the OnnxConfig for a model_type and feature combination. + + Args: + model_type (`str`): + The model type to retrieve the config for. + feature (`str`): + The feature to retrieve the config for. + + Returns: + `OnnxConfig`: config for the combination + """ + return FeaturesManager._SUPPORTED_MODEL_TYPE[model_type][feature] diff --git a/src/transformers/onnx/utils.py b/src/transformers/onnx/utils.py index def160e6c7..83b18c6ab5 100644 --- a/src/transformers/onnx/utils.py +++ b/src/transformers/onnx/utils.py @@ -14,6 +14,9 @@ from ctypes import c_float, sizeof from enum import Enum +from typing import Optional, Union + +from .. import AutoFeatureExtractor, AutoProcessor, AutoTokenizer class ParameterFormat(Enum): @@ -61,3 +64,41 @@ def compute_serialized_parameters_size(num_parameters: int, dtype: ParameterForm Size (in byte) taken to save all the parameters """ return num_parameters * dtype.size + + +def get_preprocessor(model_name: str) -> Optional[Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor]]: + """ + Gets a preprocessor (tokenizer, feature extractor or processor) that is available for `model_name`. + + Args: + model_name (`str`): Name of the model for which a preprocessor are loaded. + + Returns: + `Optional[Union[AutoTokenizer, AutoFeatureExtractor, AutoProcessor]]`: + If a processor is found, it is returned. Otherwise, if a tokenizer or a feature extractor exists, it is + returned. If both a tokenizer and a feature extractor exist, an error is raised. The function returns + `None` if no preprocessor is found. + """ + try: + return AutoProcessor.from_pretrained(model_name) + except (ValueError, OSError, KeyError): + tokenizer, feature_extractor = None, None + try: + tokenizer = AutoTokenizer.from_pretrained(model_name) + except (OSError, KeyError): + pass + try: + feature_extractor = AutoFeatureExtractor.from_pretrained(model_name) + except (OSError, KeyError): + pass + + if tokenizer is not None and feature_extractor is not None: + raise ValueError( + f"Couldn't auto-detect preprocessor for {model_name}. Found both a tokenizer and a feature extractor." + ) + elif tokenizer is None and feature_extractor is None: + return None + elif tokenizer is not None: + return tokenizer + else: + return feature_extractor diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py index d5115a9b3e..2b5a02bd59 100644 --- a/tests/onnx/test_onnx_v2.py +++ b/tests/onnx/test_onnx_v2.py @@ -6,7 +6,7 @@ from unittest.mock import patch import pytest from parameterized import parameterized -from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer, is_tf_available, is_torch_available +from transformers import AutoConfig, PreTrainedTokenizerBase, is_tf_available, is_torch_available from transformers.onnx import ( EXTERNAL_DATA_FORMAT_SIZE_LIMIT, OnnxConfig, @@ -15,7 +15,11 @@ from transformers.onnx import ( export, validate_model_outputs, ) -from transformers.onnx.utils import compute_effective_axis_dimension, compute_serialized_parameters_size +from transformers.onnx.utils import ( + compute_effective_axis_dimension, + compute_serialized_parameters_size, + get_preprocessor, +) from transformers.testing_utils import require_onnx, require_rjieba, require_tf, require_torch, require_vision, slow @@ -189,6 +193,8 @@ PYTORCH_EXPORT_MODELS = { ("deit", "facebook/deit-small-patch16-224"), ("beit", "microsoft/beit-base-patch16-224"), ("data2vec-text", "facebook/data2vec-text-base"), + ("perceiver", "deepmind/language-perceiver", ("masked-lm", "sequence-classification")), + ("perceiver", "deepmind/vision-perceiver-conv", ("image-classification",)), } PYTORCH_EXPORT_WITH_PAST_MODELS = { @@ -226,10 +232,15 @@ TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {} def _get_models_to_test(export_models_list): models_to_test = [] if is_torch_available() or is_tf_available(): - for name, model in export_models_list: - for feature, onnx_config_class_constructor in FeaturesManager.get_supported_features_for_model_type( - name - ).items(): + for name, model, *features in export_models_list: + if features: + feature_config_mapping = { + feature: FeaturesManager.get_config(name, feature) for _ in features for feature in _ + } + else: + feature_config_mapping = FeaturesManager.get_supported_features_for_model_type(name) + + for feature, onnx_config_class_constructor in feature_config_mapping.items(): models_to_test.append((f"{name}_{feature}", name, model, feature, onnx_config_class_constructor)) return sorted(models_to_test) else: @@ -261,16 +272,11 @@ class OnnxExportTestCaseV2(TestCase): f" {onnx_config.torch_onnx_minimum_version}, got: {torch_version}" ) - # Check the modality of the inputs and instantiate the appropriate preprocessor - if model.main_input_name == "input_ids": - preprocessor = AutoTokenizer.from_pretrained(model_name) - # Useful for causal lm models that do not use pad tokens. - if not getattr(config, "pad_token_id", None): - config.pad_token_id = preprocessor.eos_token_id - elif model.main_input_name == "pixel_values": - preprocessor = AutoFeatureExtractor.from_pretrained(model_name) - else: - raise ValueError(f"Unsupported model input name: {model.main_input_name}") + preprocessor = get_preprocessor(model_name) + + # Useful for causal lm models that do not use pad tokens. + if isinstance(preprocessor, PreTrainedTokenizerBase) and not getattr(config, "pad_token_id", None): + config.pad_token_id = preprocessor.eos_token_id with NamedTemporaryFile("w") as output: try: