Add ONNX export for ViT (#15658)

* Add ONNX support for ViT * Refactor to use generic preprocessor * Add vision dep to tests * Extend ONNX slow tests to ViT * Add dummy image generator * Use model_type to determine modality * Add deprecation warnings for tokenizer argument * Add warning when overwriting the preprocessor * Add optional args to docstrings * Add minimum PyTorch version to OnnxConfig * Refactor OnnxConfig class variables from CONSTANT_NAME to snake_case * Add reasonable value for default atol Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2022-03-09 17:36:59 +01:00
parent b7fa1e3dee
commit 50dd314d93
12 changed files with 270 additions and 91 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -783,7 +783,7 @@ jobs:
                      - v0.4-torch-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
-            - run: pip install .[torch,testing,sentencepiece,onnxruntime]
+            - run: pip install .[torch,testing,sentencepiece,onnxruntime,vision]
            - save_cache:
                  key: v0.4-onnx-{{ checksum "setup.py" }}
                  paths:
@@ -816,7 +816,7 @@ jobs:
                      - v0.4-torch-{{ checksum "setup.py" }}
                      - v0.4-{{ checksum "setup.py" }}
            - run: pip install --upgrade pip
-            - run: pip install .[torch,testing,sentencepiece,onnxruntime]
+            - run: pip install .[torch,testing,sentencepiece,onnxruntime,vision]
            - save_cache:
                  key: v0.4-onnx-{{ checksum "setup.py" }}
                  paths:
--- a/docs/source/serialization.mdx
+++ b/docs/source/serialization.mdx
@@ -62,6 +62,7 @@ Ready-made configurations include the following architectures:
 - PLBart
 - RoBERTa
 - T5
 - ViT
 - XLM-RoBERTa
 - XLM-RoBERTa-XL
--- a/src/transformers/models/bart/configuration_bart.py
+++ b/src/transformers/models/bart/configuration_bart.py
@@ -358,13 +358,13 @@ class BartOnnxConfig(OnnxSeq2SeqConfigWithPast):
        # Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
        # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
        batch_size = compute_effective_axis_dimension(
-            batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
+            batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
        )
        # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
        token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
        seq_length = compute_effective_axis_dimension(
-            seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
+            seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
        )
        # Generate dummy inputs according to compute batch and sequence
--- a/src/transformers/models/marian/configuration_marian.py
+++ b/src/transformers/models/marian/configuration_marian.py
@@ -346,13 +346,13 @@ class MarianOnnxConfig(OnnxSeq2SeqConfigWithPast):
        # Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
        # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
        batch_size = compute_effective_axis_dimension(
-            batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
+            batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
        )
        # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
        token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
        seq_length = compute_effective_axis_dimension(
-            seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
+            seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
        )
        # Generate dummy inputs according to compute batch and sequence
--- a/src/transformers/models/mbart/configuration_mbart.py
+++ b/src/transformers/models/mbart/configuration_mbart.py
@@ -343,13 +343,13 @@ class MBartOnnxConfig(OnnxSeq2SeqConfigWithPast):
        # Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
        # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
        batch_size = compute_effective_axis_dimension(
-            batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
+            batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
        )
        # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
        token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
        seq_length = compute_effective_axis_dimension(
-            seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
+            seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
        )
        # Generate dummy inputs according to compute batch and sequence
--- a/src/transformers/models/vit/init.py
+++ b/src/transformers/models/vit/init.py
@@ -21,7 +21,7 @@ from ...file_utils import _LazyModule, is_flax_available, is_tf_available, is_to
 _import_structure = {
-    "configuration_vit": ["VIT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ViTConfig"],
+    "configuration_vit": ["VIT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ViTConfig", "ViTOnnxConfig"],
 }
 if is_vision_available():
@@ -51,7 +51,7 @@ if is_flax_available():
    ]
 if TYPE_CHECKING:
-    from .configuration_vit import VIT_PRETRAINED_CONFIG_ARCHIVE_MAP, ViTConfig
+    from .configuration_vit import VIT_PRETRAINED_CONFIG_ARCHIVE_MAP, ViTConfig, ViTOnnxConfig
    if is_vision_available():
        from .feature_extraction_vit import ViTFeatureExtractor
--- a/src/transformers/models/vit/configuration_vit.py
+++ b/src/transformers/models/vit/configuration_vit.py
@@ -14,7 +14,13 @@
 # limitations under the License.
 """ ViT model configuration"""
 from collections import OrderedDict
 from typing import Mapping
 from packaging import version
 from ...configuration_utils import PretrainedConfig
 from ...onnx import OnnxConfig
 from ...utils import logging
@@ -119,3 +125,20 @@ class ViTConfig(PretrainedConfig):
        self.num_channels = num_channels
        self.qkv_bias = qkv_bias
        self.encoder_stride = encoder_stride
 class ViTOnnxConfig(OnnxConfig):
    torch_onnx_minimum_version = version.parse("1.11")
    @property
    def inputs(self) -> Mapping[str, Mapping[int, str]]:
        return OrderedDict(
            [
                ("pixel_values", {0: "batch", 1: "sequence"}),
            ]
        )
    @property
    def atol_for_validation(self) -> float:
        return 1e-4
--- a/src/transformers/onnx/main.py
+++ b/src/transformers/onnx/main.py
@@ -15,8 +15,9 @@
 from argparse import ArgumentParser
 from pathlib import Path
-from transformers.models.auto import AutoTokenizer
+from ..models.auto import AutoConfig, AutoFeatureExtractor, AutoTokenizer
-
+from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING_NAMES
 from ..models.auto.tokenization_auto import TOKENIZER_MAPPING_NAMES
 from ..utils import logging
 from .convert import export, validate_model_outputs
 from .features import FeaturesManager
@@ -46,8 +47,17 @@ def main():
    if not args.output.parent.exists():
        args.output.parent.mkdir(parents=True)
    # Check the modality of the inputs and instantiate the appropriate preprocessor
    # TODO(lewtun): Refactor this as a function if we need to check modalities elsewhere as well
    config = AutoConfig.from_pretrained(args.model)
    if config.model_type in TOKENIZER_MAPPING_NAMES:
        preprocessor = AutoTokenizer.from_pretrained(args.model)
    elif config.model_type in FEATURE_EXTRACTOR_MAPPING_NAMES:
        preprocessor = AutoFeatureExtractor.from_pretrained(args.model)
    else:
        raise ValueError(f"Unsupported model type: {config.model_type}")
    # Allocate the model
    tokenizer = AutoTokenizer.from_pretrained(args.model)
    model = FeaturesManager.get_model_from_feature(args.feature, args.model)
    model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model, feature=args.feature)
    onnx_config = model_onnx_config(model.config)
@@ -62,12 +72,18 @@ def main():
            f"At least  {onnx_config.default_onnx_opset} is required."
        )
-    onnx_inputs, onnx_outputs = export(tokenizer, model, onnx_config, args.opset, args.output)
+    onnx_inputs, onnx_outputs = export(
        preprocessor,
        model,
        onnx_config,
        args.opset,
        args.output,
    )
    if args.atol is None:
        args.atol = onnx_config.atol_for_validation
-    validate_model_outputs(onnx_config, tokenizer, model, args.output, onnx_outputs, args.atol)
+    validate_model_outputs(onnx_config, preprocessor, model, args.output, onnx_outputs, args.atol)
    logger.info(f"All good, model saved at: {args.output.as_posix()}")
--- a/src/transformers/onnx/config.py
+++ b/src/transformers/onnx/config.py
@@ -13,15 +13,31 @@
 # limitations under the License.
 import copy
 import dataclasses
 import warnings
 from abc import ABC, abstractmethod
 from collections import OrderedDict
-from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Union
-from transformers import PretrainedConfig, PreTrainedTokenizer, TensorType, is_torch_available
+import numpy as np
 from packaging import version
 from ..file_utils import TensorType, is_torch_available, is_vision_available
 from ..utils import logging
 from .utils import ParameterFormat, compute_effective_axis_dimension, compute_serialized_parameters_size
 if TYPE_CHECKING:
    from ..configuration_utils import PretrainedConfig
    from ..feature_extraction_utils import FeatureExtractionMixin
    from ..tokenization_utils_base import PreTrainedTokenizerBase
 if is_vision_available():
    from PIL import Image
 logger = logging.get_logger(__name__)
 DEFAULT_ONNX_OPSET = 11
 # 2 Gb
@@ -54,10 +70,10 @@ class OnnxConfig(ABC):
    Base class for ONNX exportable model describing metadata on how to export the model through the ONNX format.
    """
-    DEFAULT_FIXED_BATCH = 2
+    default_fixed_batch = 2
-    DEFAULT_FIXED_SEQUENCE = 8
+    default_fixed_sequence = 8
-
+    torch_onnx_minimum_version = version.parse("1.8")
-    _TASKS_TO_COMMON_OUTPUTS = {
+    _tasks_to_common_outputs = {
        "default": OrderedDict({"last_hidden_state": {0: "batch", 1: "sequence"}}),
        "masked-lm": OrderedDict({"logits": {0: "batch", 1: "sequence"}}),
        "causal-lm": OrderedDict({"logits": {0: "batch", 1: "sequence"}}),
@@ -71,14 +87,15 @@ class OnnxConfig(ABC):
                "end_logits": {0: "batch", 1: "sequence"},
            }
        ),
        "image-classification": OrderedDict({"logits": {0: "batch", 1: "sequence"}}),
    }
-    def __init__(self, config: PretrainedConfig, task: str = "default", patching_specs: List[PatchingSpec] = None):
+    def __init__(self, config: "PretrainedConfig", task: str = "default", patching_specs: List[PatchingSpec] = None):
        self._config = config
-        if task not in self._TASKS_TO_COMMON_OUTPUTS:
+        if task not in self._tasks_to_common_outputs:
            raise ValueError(
-                f"{task} is not a supported task, supported tasks: {self._TASKS_TO_COMMON_OUTPUTS.keys()}"
+                f"{task} is not a supported task, supported tasks: {self._tasks_to_common_outputs.keys()}"
            )
        self.task = task
@@ -90,7 +107,7 @@ class OnnxConfig(ABC):
            self._patching_specs.append(final_spec)
    @classmethod
-    def from_model_config(cls, config: PretrainedConfig, task: str = "default") -> "OnnxConfig":
+    def from_model_config(cls, config: "PretrainedConfig", task: str = "default") -> "OnnxConfig":
        """
        Instantiate a OnnxConfig for a specific model
@@ -121,7 +138,7 @@ class OnnxConfig(ABC):
        Returns:
            For each output: its name associated to the axes symbolic name and the axis position within the tensor
        """
-        common_outputs = self._TASKS_TO_COMMON_OUTPUTS[self.task]
+        common_outputs = self._tasks_to_common_outputs[self.task]
        return copy.deepcopy(common_outputs)
    @property
@@ -146,7 +163,7 @@ class OnnxConfig(ABC):
            Integer > 0
        """
        # Using 2 avoid ONNX making assumption about single sample batch
-        return OnnxConfig.DEFAULT_FIXED_BATCH
+        return OnnxConfig.default_fixed_batch
    @property
    def default_sequence_length(self) -> int:
@@ -156,7 +173,7 @@ class OnnxConfig(ABC):
        Returns:
            Integer > 0
        """
-        return OnnxConfig.DEFAULT_FIXED_SEQUENCE
+        return OnnxConfig.default_fixed_sequence
    @property
    def default_onnx_opset(self) -> int:
@@ -178,6 +195,21 @@ class OnnxConfig(ABC):
        """
        return 1e-5
    @property
    def is_torch_support_available(self) -> bool:
        """
        The minimum PyTorch version required to export the model.
        Returns:
            `bool`: Whether the installed version of PyTorch is compatible with the model.
        """
        if is_torch_available():
            from transformers.file_utils import torch_version
            return torch_version >= self.torch_onnx_minimum_version
        else:
            return False
    @staticmethod
    def use_external_data_format(num_parameters: int) -> bool:
        """
@@ -195,42 +227,85 @@ class OnnxConfig(ABC):
            >= EXTERNAL_DATA_FORMAT_SIZE_LIMIT
        )
    def _generate_dummy_images(
        self, batch_size: int = 2, num_channels: int = 3, image_height: int = 40, image_width: int = 40
    ):
        images = []
        for _ in range(batch_size):
            data = np.random.rand(image_height, image_width, num_channels) * 255
            images.append(Image.fromarray(data.astype("uint8")).convert("RGB"))
        return images
    def generate_dummy_inputs(
        self,
-        tokenizer: PreTrainedTokenizer,
+        preprocessor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"],
        batch_size: int = -1,
        seq_length: int = -1,
        is_pair: bool = False,
        framework: Optional[TensorType] = None,
        num_channels: int = 3,
        image_width: int = 40,
        image_height: int = 40,
        tokenizer: "PreTrainedTokenizerBase" = None,
    ) -> Mapping[str, Any]:
        """
        Generate inputs to provide to the ONNX exporter for the specific framework
        Args:
-            tokenizer: The tokenizer associated with this model configuration
+            preprocessor: ([`PreTrainedTokenizerBase`] or [`FeatureExtractionMixin`]):
-            batch_size: The batch size (int) to export the model for (-1 means dynamic axis)
+                The preprocessor associated with this model configuration.
-            seq_length: The sequence length (int) to export the model for (-1 means dynamic axis)
+            batch_size (`int`, *optional*, defaults to -1):
-            is_pair: Indicate if the input is a pair (sentence 1, sentence 2)
+                The batch size to export the model for (-1 means dynamic axis).
-            framework: The framework (optional) the tokenizer will generate tensor for
+            seq_length (`int`, *optional*, defaults to -1):
                The sequence length to export the model for (-1 means dynamic axis).
            is_pair (`bool`, *optional*, defaults to `False`):
                Indicate if the input is a pair (sentence 1, sentence 2)
            framework (`TensorType`, *optional*, defaults to `None`):
                The framework (PyTorch or TensorFlow) that the tokenizer will generate tensors for.
            num_channels (`int`, *optional*, defaults to 3):
                The number of channels of the generated images.
            image_width (`int`, *optional*, defaults to 40):
                The width of the generated images.
            image_height (`int`, *optional*, defaults to 40):
                The height of the generated images.
        Returns:
            Mapping[str, Tensor] holding the kwargs to provide to the model's forward function
        """
        from ..feature_extraction_utils import FeatureExtractionMixin
        from ..tokenization_utils_base import PreTrainedTokenizerBase
-        # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
+        if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
-        batch_size = compute_effective_axis_dimension(
+            raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.")
-            batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
+        if tokenizer is not None:
-        )
+            warnings.warn(
-
+                "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
-        # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
+                FutureWarning,
-        token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
+            )
-        seq_length = compute_effective_axis_dimension(
+            logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
-            seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
+            preprocessor = tokenizer
-        )
+        if isinstance(preprocessor, PreTrainedTokenizerBase):
-
+            # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
-        # Generate dummy inputs according to compute batch and sequence
+            batch_size = compute_effective_axis_dimension(
-        dummy_input = [" ".join([tokenizer.unk_token]) * seq_length] * batch_size
+                batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
-        return dict(tokenizer(dummy_input, return_tensors=framework))
+            )
            # If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
            token_to_add = preprocessor.num_special_tokens_to_add(is_pair)
            seq_length = compute_effective_axis_dimension(
                seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
            )
            # Generate dummy inputs according to compute batch and sequence
            dummy_input = [" ".join([preprocessor.unk_token]) * seq_length] * batch_size
            return dict(preprocessor(dummy_input, return_tensors=framework))
        elif isinstance(preprocessor, FeatureExtractionMixin) and preprocessor.model_input_names[0] == "pixel_values":
            # If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
            batch_size = compute_effective_axis_dimension(batch_size, fixed_dimension=OnnxConfig.default_fixed_batch)
            dummy_input = self._generate_dummy_images(batch_size, num_channels, image_height, image_width)
            return dict(preprocessor(images=dummy_input, return_tensors=framework))
        else:
            raise ValueError(
                "Unable to generate dummy inputs for the model. Please provide a tokenizer or a preprocessor."
            )
    def patch_ops(self):
        for spec in self._patching_specs:
@@ -264,7 +339,7 @@ class OnnxConfig(ABC):
 class OnnxConfigWithPast(OnnxConfig, ABC):
    def __init__(
        self,
-        config: PretrainedConfig,
+        config: "PretrainedConfig",
        task: str = "default",
        patching_specs: List[PatchingSpec] = None,
        use_past: bool = False,
@@ -273,7 +348,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
        self.use_past = use_past
    @classmethod
-    def with_past(cls, config: PretrainedConfig, task: str = "default") -> "OnnxConfigWithPast":
+    def with_past(cls, config: "PretrainedConfig", task: str = "default") -> "OnnxConfigWithPast":
        """
        Instantiate a OnnxConfig with `use_past` attribute set to True
@@ -326,7 +401,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
    def generate_dummy_inputs(
        self,
-        tokenizer: PreTrainedTokenizer,
+        tokenizer: "PreTrainedTokenizerBase",
        batch_size: int = -1,
        seq_length: int = -1,
        is_pair: bool = False,
@@ -445,7 +520,7 @@ class OnnxSeq2SeqConfigWithPast(OnnxConfigWithPast):
    def generate_dummy_inputs(
        self,
-        tokenizer: PreTrainedTokenizer,
+        tokenizer: "PreTrainedTokenizerBase",
        batch_size: int = -1,
        seq_length: int = -1,
        is_pair: bool = False,
--- a/src/transformers/onnx/convert.py
+++ b/src/transformers/onnx/convert.py
@@ -12,18 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import warnings
 from inspect import signature
 from itertools import chain
 from pathlib import Path
-from typing import Iterable, List, Tuple, Union
+from typing import TYPE_CHECKING, Iterable, List, Tuple, Union
 import numpy as np
 from packaging.version import Version, parse
-from transformers import PreTrainedModel, PreTrainedTokenizer, TensorType, TFPreTrainedModel, is_torch_available
+from ..file_utils import TensorType, is_tf_available, is_torch_available, is_torch_onnx_dict_inputs_support_available
-from transformers.file_utils import is_tf_available, is_torch_onnx_dict_inputs_support_available
+from ..utils import logging
-from transformers.onnx.config import OnnxConfig
+from .config import OnnxConfig
-from transformers.utils import logging
+
 if is_torch_available():
    from ..modeling_utils import PreTrainedModel
 if is_tf_available():
    from ..modeling_tf_utils import TFPreTrainedModel
 if TYPE_CHECKING:
    from ..feature_extraction_utils import FeatureExtractionMixin
    from ..tokenization_utils import PreTrainedTokenizer
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -63,18 +74,19 @@ def check_onnxruntime_requirements(minimum_version: Version):
 def export_pytorch(
-    tokenizer: PreTrainedTokenizer,
+    preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"],
-    model: PreTrainedModel,
+    model: "PreTrainedModel",
    config: OnnxConfig,
    opset: int,
    output: Path,
    tokenizer: "PreTrainedTokenizer" = None,
 ) -> Tuple[List[str], List[str]]:
    """
    Export a PyTorch model to an ONNX Intermediate Representation (IR)
    Args:
-        tokenizer ([`PreTrainedTokenizer`]):
+        preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]):
-            The tokenizer used for encoding the data.
+            The preprocessor used for encoding the data.
        model ([`PreTrainedModel`]):
            The model to export.
        config ([`~onnx.config.OnnxConfig`]):
@@ -88,6 +100,11 @@ def export_pytorch(
        `Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from
        the ONNX configuration.
    """
    if tokenizer is not None:
        warnings.warn(
            "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
            FutureWarning,
        )
    if issubclass(type(model), PreTrainedModel):
        import torch
        from torch.onnx import export as onnx_export
@@ -106,7 +123,9 @@ def export_pytorch(
            # Ensure inputs match
            # TODO: Check when exporting QA we provide "is_pair=True"
-            model_inputs = config.generate_dummy_inputs(tokenizer, framework=TensorType.PYTORCH)
+            model_inputs = config.generate_dummy_inputs(
                preprocessor, tokenizer=tokenizer, framework=TensorType.PYTORCH
            )
            inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys())
            onnx_outputs = list(config.outputs.keys())
@@ -163,18 +182,19 @@ def export_pytorch(
 def export_tensorflow(
-    tokenizer: PreTrainedTokenizer,
+    preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"],
-    model: TFPreTrainedModel,
+    model: "TFPreTrainedModel",
    config: OnnxConfig,
    opset: int,
    output: Path,
    tokenizer: "PreTrainedTokenizer" = None,
 ) -> Tuple[List[str], List[str]]:
    """
    Export a TensorFlow model to an ONNX Intermediate Representation (IR)
    Args:
-        tokenizer ([`PreTrainedTokenizer`]):
+        preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]):
-            The tokenizer used for encoding the data.
+            The preprocessor used for encoding the data.
        model ([`TFPreTrainedModel`]):
            The model to export.
        config ([`~onnx.config.OnnxConfig`]):
@@ -193,6 +213,12 @@ def export_tensorflow(
    import onnx
    import tf2onnx
    if tokenizer is not None:
        warnings.warn(
            "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
            FutureWarning,
        )
    model.config.return_dict = True
    # Check if we need to override certain configuration item
@@ -203,7 +229,7 @@ def export_tensorflow(
            setattr(model.config, override_config_key, override_config_value)
    # Ensure inputs match
-    model_inputs = config.generate_dummy_inputs(tokenizer, framework=TensorType.TENSORFLOW)
+    model_inputs = config.generate_dummy_inputs(preprocessor, tokenizer=tokenizer, framework=TensorType.TENSORFLOW)
    inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys())
    onnx_outputs = list(config.outputs.keys())
@@ -216,18 +242,19 @@ def export_tensorflow(
 def export(
-    tokenizer: PreTrainedTokenizer,
+    preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"],
-    model: Union[PreTrainedModel, TFPreTrainedModel],
+    model: Union["PreTrainedModel", "TFPreTrainedModel"],
    config: OnnxConfig,
    opset: int,
    output: Path,
    tokenizer: "PreTrainedTokenizer" = None,
 ) -> Tuple[List[str], List[str]]:
    """
    Export a Pytorch or TensorFlow model to an ONNX Intermediate Representation (IR)
    Args:
-        tokenizer ([`PreTrainedTokenizer`]):
+        preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]):
-            The tokenizer used for encoding the data.
+            The preprocessor used for encoding the data.
        model ([`PreTrainedModel`] or [`TFPreTrainedModel`]):
            The model to export.
        config ([`~onnx.config.OnnxConfig`]):
@@ -246,26 +273,37 @@ def export(
            "Cannot convert because neither PyTorch nor TensorFlow are not installed. "
            "Please install torch or tensorflow first."
        )
    if tokenizer is not None:
        warnings.warn(
            "The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
            FutureWarning,
        )
    if is_torch_available():
-        from transformers.file_utils import torch_version
+        from ..file_utils import torch_version
        if not is_torch_onnx_dict_inputs_support_available():
            raise AssertionError(f"Unsupported PyTorch version, minimum required is 1.8.0, got: {torch_version}")
        if not config.is_torch_support_available:
            logger.warning(
                f"Unsupported PyTorch version for this model. Minimum required is {config.torch_onnx_minimum_version}, got: {torch_version}"
            )
    if is_torch_available() and issubclass(type(model), PreTrainedModel):
-        return export_pytorch(tokenizer, model, config, opset, output)
+        return export_pytorch(preprocessor, model, config, opset, output, tokenizer=tokenizer)
    elif is_tf_available() and issubclass(type(model), TFPreTrainedModel):
-        return export_tensorflow(tokenizer, model, config, opset, output)
+        return export_tensorflow(preprocessor, model, config, opset, output, tokenizer=tokenizer)
 def validate_model_outputs(
    config: OnnxConfig,
-    tokenizer: PreTrainedTokenizer,
+    preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"],
-    reference_model: Union[PreTrainedModel, TFPreTrainedModel],
+    reference_model: Union["PreTrainedModel", "TFPreTrainedModel"],
    onnx_model: Path,
    onnx_named_outputs: List[str],
    atol: float,
    tokenizer: "PreTrainedTokenizer" = None,
 ):
    from onnxruntime import InferenceSession, SessionOptions
@@ -274,9 +312,13 @@ def validate_model_outputs(
    # TODO: generate inputs with a different batch_size and seq_len that was used for conversion to properly test
    # dynamic input shapes.
    if issubclass(type(reference_model), PreTrainedModel):
-        reference_model_inputs = config.generate_dummy_inputs(tokenizer, framework=TensorType.PYTORCH)
+        reference_model_inputs = config.generate_dummy_inputs(
            preprocessor, tokenizer=tokenizer, framework=TensorType.PYTORCH
        )
    else:
-        reference_model_inputs = config.generate_dummy_inputs(tokenizer, framework=TensorType.TENSORFLOW)
+        reference_model_inputs = config.generate_dummy_inputs(
            preprocessor, tokenizer=tokenizer, framework=TensorType.TENSORFLOW
        )
    # Create ONNX Runtime session
    options = SessionOptions()
@@ -354,7 +396,7 @@ def validate_model_outputs(
 def ensure_model_and_config_inputs_match(
-    model: Union[PreTrainedModel, TFPreTrainedModel], model_inputs: Iterable[str]
+    model: Union["PreTrainedModel", "TFPreTrainedModel"], model_inputs: Iterable[str]
 ) -> Tuple[bool, List[str]]:
    """
--- a/src/transformers/onnx/features.py
+++ b/src/transformers/onnx/features.py
@@ -17,6 +17,7 @@ from ..models.marian import MarianOnnxConfig
 from ..models.mbart import MBartOnnxConfig
 from ..models.roberta import RobertaOnnxConfig
 from ..models.t5 import T5OnnxConfig
 from ..models.vit import ViTOnnxConfig
 from ..models.xlm_roberta import XLMRobertaOnnxConfig
 from ..utils import logging
 from .config import OnnxConfig
@@ -28,6 +29,7 @@ if is_torch_available():
    from transformers.models.auto import (
        AutoModel,
        AutoModelForCausalLM,
        AutoModelForImageClassification,
        AutoModelForMaskedLM,
        AutoModelForMultipleChoice,
        AutoModelForQuestionAnswering,
@@ -90,6 +92,7 @@ class FeaturesManager:
            "token-classification": AutoModelForTokenClassification,
            "multiple-choice": AutoModelForMultipleChoice,
            "question-answering": AutoModelForQuestionAnswering,
            "image-classification": AutoModelForImageClassification,
        }
    elif is_tf_available():
        _TASKS_TO_AUTOMODELS = {
@@ -244,6 +247,7 @@ class FeaturesManager:
            "question-answering",
            onnx_config_cls=ElectraOnnxConfig,
        ),
        "vit": supported_features_mapping("default", "image-classification", onnx_config_cls=ViTOnnxConfig),
    }
    AVAILABLE_FEATURES = sorted(reduce(lambda s1, s2: s1 | s2, (v.keys() for v in _SUPPORTED_MODEL_TYPE.values())))
--- a/tests/onnx/test_onnx_v2.py
+++ b/tests/onnx/test_onnx_v2.py
@@ -3,23 +3,25 @@ from tempfile import NamedTemporaryFile
 from unittest import TestCase
 from unittest.mock import patch
 import pytest
 from parameterized import parameterized
-from transformers import AutoConfig, AutoTokenizer, is_tf_available, is_torch_available
+from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer, is_tf_available, is_torch_available
 from transformers.onnx import (
    EXTERNAL_DATA_FORMAT_SIZE_LIMIT,
    OnnxConfig,
    OnnxConfigWithPast,
    ParameterFormat,
    export,
    validate_model_outputs,
 )
 from transformers.onnx.config import OnnxConfigWithPast
 if is_torch_available() or is_tf_available():
    from transformers.onnx.features import FeaturesManager
 from transformers.onnx.utils import compute_effective_axis_dimension, compute_serialized_parameters_size
-from transformers.testing_utils import require_onnx, require_tf, require_torch, slow
+from transformers.testing_utils import require_onnx, require_tf, require_torch, require_vision, slow
@require_onnx
@@ -178,6 +180,7 @@ PYTORCH_EXPORT_MODELS = {
    ("roberta", "roberta-base"),
    ("xlm-roberta", "xlm-roberta-base"),
    ("layoutlm", "microsoft/layoutlm-base-uncased"),
    ("vit", "google/vit-base-patch16-224"),
 }
 PYTORCH_EXPORT_WITH_PAST_MODELS = {
@@ -241,25 +244,38 @@ class OnnxExportTestCaseV2(TestCase):
    def _onnx_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
        from transformers.onnx import export
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        config = AutoConfig.from_pretrained(model_name)
        # Useful for causal lm models that do not use pad tokens.
        if not getattr(config, "pad_token_id", None):
            config.pad_token_id = tokenizer.eos_token_id
        model_class = FeaturesManager.get_model_class_for_feature(feature)
        config = AutoConfig.from_pretrained(model_name)
        model = model_class.from_config(config)
        onnx_config = onnx_config_class_constructor(model.config)
        if is_torch_available():
            from transformers.file_utils import torch_version
            if torch_version < onnx_config.torch_onnx_minimum_version:
                pytest.skip(
                    f"Skipping due to incompatible PyTorch version. Minimum required is {onnx_config.torch_onnx_minimum_version}, got: {torch_version}"
                )
        # Check the modality of the inputs and instantiate the appropriate preprocessor
        if model.main_input_name == "input_ids":
            preprocessor = AutoTokenizer.from_pretrained(model_name)
            # Useful for causal lm models that do not use pad tokens.
            if not getattr(config, "pad_token_id", None):
                config.pad_token_id = preprocessor.eos_token_id
        elif model.main_input_name == "pixel_values":
            preprocessor = AutoFeatureExtractor.from_pretrained(model_name)
        else:
            raise ValueError(f"Unsupported model input name: {model.main_input_name}")
        with NamedTemporaryFile("w") as output:
            try:
                onnx_inputs, onnx_outputs = export(
-                    tokenizer, model, onnx_config, onnx_config.default_onnx_opset, Path(output.name)
+                    preprocessor, model, onnx_config, onnx_config.default_onnx_opset, Path(output.name)
                )
                validate_model_outputs(
                    onnx_config,
-                    tokenizer,
+                    preprocessor,
                    model,
                    Path(output.name),
                    onnx_outputs,
@@ -271,6 +287,7 @@ class OnnxExportTestCaseV2(TestCase):
    @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS))
    @slow
    @require_torch
    @require_vision
    def test_pytorch_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
        self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
@@ -291,6 +308,7 @@ class OnnxExportTestCaseV2(TestCase):
    @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_DEFAULT_MODELS))
    @slow
    @require_tf
    @require_vision
    def test_tensorflow_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
        self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)