Add ONNX export for ViT (#15658)
* Add ONNX support for ViT * Refactor to use generic preprocessor * Add vision dep to tests * Extend ONNX slow tests to ViT * Add dummy image generator * Use model_type to determine modality * Add deprecation warnings for tokenizer argument * Add warning when overwriting the preprocessor * Add optional args to docstrings * Add minimum PyTorch version to OnnxConfig * Refactor OnnxConfig class variables from CONSTANT_NAME to snake_case * Add reasonable value for default atol Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -783,7 +783,7 @@ jobs:
|
||||
- v0.4-torch-{{ checksum "setup.py" }}
|
||||
- v0.4-{{ checksum "setup.py" }}
|
||||
- run: pip install --upgrade pip
|
||||
- run: pip install .[torch,testing,sentencepiece,onnxruntime]
|
||||
- run: pip install .[torch,testing,sentencepiece,onnxruntime,vision]
|
||||
- save_cache:
|
||||
key: v0.4-onnx-{{ checksum "setup.py" }}
|
||||
paths:
|
||||
@@ -816,7 +816,7 @@ jobs:
|
||||
- v0.4-torch-{{ checksum "setup.py" }}
|
||||
- v0.4-{{ checksum "setup.py" }}
|
||||
- run: pip install --upgrade pip
|
||||
- run: pip install .[torch,testing,sentencepiece,onnxruntime]
|
||||
- run: pip install .[torch,testing,sentencepiece,onnxruntime,vision]
|
||||
- save_cache:
|
||||
key: v0.4-onnx-{{ checksum "setup.py" }}
|
||||
paths:
|
||||
|
||||
@@ -62,6 +62,7 @@ Ready-made configurations include the following architectures:
|
||||
- PLBart
|
||||
- RoBERTa
|
||||
- T5
|
||||
- ViT
|
||||
- XLM-RoBERTa
|
||||
- XLM-RoBERTa-XL
|
||||
|
||||
|
||||
@@ -358,13 +358,13 @@ class BartOnnxConfig(OnnxSeq2SeqConfigWithPast):
|
||||
# Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
|
||||
batch_size = compute_effective_axis_dimension(
|
||||
batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
|
||||
batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
|
||||
)
|
||||
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
|
||||
token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
|
||||
seq_length = compute_effective_axis_dimension(
|
||||
seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
|
||||
seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
|
||||
)
|
||||
|
||||
# Generate dummy inputs according to compute batch and sequence
|
||||
|
||||
@@ -346,13 +346,13 @@ class MarianOnnxConfig(OnnxSeq2SeqConfigWithPast):
|
||||
# Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
|
||||
batch_size = compute_effective_axis_dimension(
|
||||
batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
|
||||
batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
|
||||
)
|
||||
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
|
||||
token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
|
||||
seq_length = compute_effective_axis_dimension(
|
||||
seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
|
||||
seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
|
||||
)
|
||||
|
||||
# Generate dummy inputs according to compute batch and sequence
|
||||
|
||||
@@ -343,13 +343,13 @@ class MBartOnnxConfig(OnnxSeq2SeqConfigWithPast):
|
||||
# Did not use super(OnnxConfigWithPast, self).generate_dummy_inputs for code clarity.
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
|
||||
batch_size = compute_effective_axis_dimension(
|
||||
batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
|
||||
batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
|
||||
)
|
||||
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
|
||||
token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
|
||||
seq_length = compute_effective_axis_dimension(
|
||||
seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
|
||||
seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
|
||||
)
|
||||
|
||||
# Generate dummy inputs according to compute batch and sequence
|
||||
|
||||
@@ -21,7 +21,7 @@ from ...file_utils import _LazyModule, is_flax_available, is_tf_available, is_to
|
||||
|
||||
|
||||
_import_structure = {
|
||||
"configuration_vit": ["VIT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ViTConfig"],
|
||||
"configuration_vit": ["VIT_PRETRAINED_CONFIG_ARCHIVE_MAP", "ViTConfig", "ViTOnnxConfig"],
|
||||
}
|
||||
|
||||
if is_vision_available():
|
||||
@@ -51,7 +51,7 @@ if is_flax_available():
|
||||
]
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .configuration_vit import VIT_PRETRAINED_CONFIG_ARCHIVE_MAP, ViTConfig
|
||||
from .configuration_vit import VIT_PRETRAINED_CONFIG_ARCHIVE_MAP, ViTConfig, ViTOnnxConfig
|
||||
|
||||
if is_vision_available():
|
||||
from .feature_extraction_vit import ViTFeatureExtractor
|
||||
|
||||
@@ -14,7 +14,13 @@
|
||||
# limitations under the License.
|
||||
""" ViT model configuration"""
|
||||
|
||||
from collections import OrderedDict
|
||||
from typing import Mapping
|
||||
|
||||
from packaging import version
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...onnx import OnnxConfig
|
||||
from ...utils import logging
|
||||
|
||||
|
||||
@@ -119,3 +125,20 @@ class ViTConfig(PretrainedConfig):
|
||||
self.num_channels = num_channels
|
||||
self.qkv_bias = qkv_bias
|
||||
self.encoder_stride = encoder_stride
|
||||
|
||||
|
||||
class ViTOnnxConfig(OnnxConfig):
|
||||
|
||||
torch_onnx_minimum_version = version.parse("1.11")
|
||||
|
||||
@property
|
||||
def inputs(self) -> Mapping[str, Mapping[int, str]]:
|
||||
return OrderedDict(
|
||||
[
|
||||
("pixel_values", {0: "batch", 1: "sequence"}),
|
||||
]
|
||||
)
|
||||
|
||||
@property
|
||||
def atol_for_validation(self) -> float:
|
||||
return 1e-4
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
from transformers.models.auto import AutoTokenizer
|
||||
|
||||
from ..models.auto import AutoConfig, AutoFeatureExtractor, AutoTokenizer
|
||||
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING_NAMES
|
||||
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING_NAMES
|
||||
from ..utils import logging
|
||||
from .convert import export, validate_model_outputs
|
||||
from .features import FeaturesManager
|
||||
@@ -46,8 +47,17 @@ def main():
|
||||
if not args.output.parent.exists():
|
||||
args.output.parent.mkdir(parents=True)
|
||||
|
||||
# Check the modality of the inputs and instantiate the appropriate preprocessor
|
||||
# TODO(lewtun): Refactor this as a function if we need to check modalities elsewhere as well
|
||||
config = AutoConfig.from_pretrained(args.model)
|
||||
if config.model_type in TOKENIZER_MAPPING_NAMES:
|
||||
preprocessor = AutoTokenizer.from_pretrained(args.model)
|
||||
elif config.model_type in FEATURE_EXTRACTOR_MAPPING_NAMES:
|
||||
preprocessor = AutoFeatureExtractor.from_pretrained(args.model)
|
||||
else:
|
||||
raise ValueError(f"Unsupported model type: {config.model_type}")
|
||||
|
||||
# Allocate the model
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
||||
model = FeaturesManager.get_model_from_feature(args.feature, args.model)
|
||||
model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model, feature=args.feature)
|
||||
onnx_config = model_onnx_config(model.config)
|
||||
@@ -62,12 +72,18 @@ def main():
|
||||
f"At least {onnx_config.default_onnx_opset} is required."
|
||||
)
|
||||
|
||||
onnx_inputs, onnx_outputs = export(tokenizer, model, onnx_config, args.opset, args.output)
|
||||
onnx_inputs, onnx_outputs = export(
|
||||
preprocessor,
|
||||
model,
|
||||
onnx_config,
|
||||
args.opset,
|
||||
args.output,
|
||||
)
|
||||
|
||||
if args.atol is None:
|
||||
args.atol = onnx_config.atol_for_validation
|
||||
|
||||
validate_model_outputs(onnx_config, tokenizer, model, args.output, onnx_outputs, args.atol)
|
||||
validate_model_outputs(onnx_config, preprocessor, model, args.output, onnx_outputs, args.atol)
|
||||
logger.info(f"All good, model saved at: {args.output.as_posix()}")
|
||||
|
||||
|
||||
|
||||
@@ -13,15 +13,31 @@
|
||||
# limitations under the License.
|
||||
import copy
|
||||
import dataclasses
|
||||
import warnings
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import OrderedDict
|
||||
from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Union
|
||||
|
||||
from transformers import PretrainedConfig, PreTrainedTokenizer, TensorType, is_torch_available
|
||||
import numpy as np
|
||||
from packaging import version
|
||||
|
||||
from ..file_utils import TensorType, is_torch_available, is_vision_available
|
||||
from ..utils import logging
|
||||
from .utils import ParameterFormat, compute_effective_axis_dimension, compute_serialized_parameters_size
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..configuration_utils import PretrainedConfig
|
||||
from ..feature_extraction_utils import FeatureExtractionMixin
|
||||
from ..tokenization_utils_base import PreTrainedTokenizerBase
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
from PIL import Image
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
DEFAULT_ONNX_OPSET = 11
|
||||
|
||||
# 2 Gb
|
||||
@@ -54,10 +70,10 @@ class OnnxConfig(ABC):
|
||||
Base class for ONNX exportable model describing metadata on how to export the model through the ONNX format.
|
||||
"""
|
||||
|
||||
DEFAULT_FIXED_BATCH = 2
|
||||
DEFAULT_FIXED_SEQUENCE = 8
|
||||
|
||||
_TASKS_TO_COMMON_OUTPUTS = {
|
||||
default_fixed_batch = 2
|
||||
default_fixed_sequence = 8
|
||||
torch_onnx_minimum_version = version.parse("1.8")
|
||||
_tasks_to_common_outputs = {
|
||||
"default": OrderedDict({"last_hidden_state": {0: "batch", 1: "sequence"}}),
|
||||
"masked-lm": OrderedDict({"logits": {0: "batch", 1: "sequence"}}),
|
||||
"causal-lm": OrderedDict({"logits": {0: "batch", 1: "sequence"}}),
|
||||
@@ -71,14 +87,15 @@ class OnnxConfig(ABC):
|
||||
"end_logits": {0: "batch", 1: "sequence"},
|
||||
}
|
||||
),
|
||||
"image-classification": OrderedDict({"logits": {0: "batch", 1: "sequence"}}),
|
||||
}
|
||||
|
||||
def __init__(self, config: PretrainedConfig, task: str = "default", patching_specs: List[PatchingSpec] = None):
|
||||
def __init__(self, config: "PretrainedConfig", task: str = "default", patching_specs: List[PatchingSpec] = None):
|
||||
self._config = config
|
||||
|
||||
if task not in self._TASKS_TO_COMMON_OUTPUTS:
|
||||
if task not in self._tasks_to_common_outputs:
|
||||
raise ValueError(
|
||||
f"{task} is not a supported task, supported tasks: {self._TASKS_TO_COMMON_OUTPUTS.keys()}"
|
||||
f"{task} is not a supported task, supported tasks: {self._tasks_to_common_outputs.keys()}"
|
||||
)
|
||||
self.task = task
|
||||
|
||||
@@ -90,7 +107,7 @@ class OnnxConfig(ABC):
|
||||
self._patching_specs.append(final_spec)
|
||||
|
||||
@classmethod
|
||||
def from_model_config(cls, config: PretrainedConfig, task: str = "default") -> "OnnxConfig":
|
||||
def from_model_config(cls, config: "PretrainedConfig", task: str = "default") -> "OnnxConfig":
|
||||
"""
|
||||
Instantiate a OnnxConfig for a specific model
|
||||
|
||||
@@ -121,7 +138,7 @@ class OnnxConfig(ABC):
|
||||
Returns:
|
||||
For each output: its name associated to the axes symbolic name and the axis position within the tensor
|
||||
"""
|
||||
common_outputs = self._TASKS_TO_COMMON_OUTPUTS[self.task]
|
||||
common_outputs = self._tasks_to_common_outputs[self.task]
|
||||
return copy.deepcopy(common_outputs)
|
||||
|
||||
@property
|
||||
@@ -146,7 +163,7 @@ class OnnxConfig(ABC):
|
||||
Integer > 0
|
||||
"""
|
||||
# Using 2 avoid ONNX making assumption about single sample batch
|
||||
return OnnxConfig.DEFAULT_FIXED_BATCH
|
||||
return OnnxConfig.default_fixed_batch
|
||||
|
||||
@property
|
||||
def default_sequence_length(self) -> int:
|
||||
@@ -156,7 +173,7 @@ class OnnxConfig(ABC):
|
||||
Returns:
|
||||
Integer > 0
|
||||
"""
|
||||
return OnnxConfig.DEFAULT_FIXED_SEQUENCE
|
||||
return OnnxConfig.default_fixed_sequence
|
||||
|
||||
@property
|
||||
def default_onnx_opset(self) -> int:
|
||||
@@ -178,6 +195,21 @@ class OnnxConfig(ABC):
|
||||
"""
|
||||
return 1e-5
|
||||
|
||||
@property
|
||||
def is_torch_support_available(self) -> bool:
|
||||
"""
|
||||
The minimum PyTorch version required to export the model.
|
||||
|
||||
Returns:
|
||||
`bool`: Whether the installed version of PyTorch is compatible with the model.
|
||||
"""
|
||||
if is_torch_available():
|
||||
from transformers.file_utils import torch_version
|
||||
|
||||
return torch_version >= self.torch_onnx_minimum_version
|
||||
else:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def use_external_data_format(num_parameters: int) -> bool:
|
||||
"""
|
||||
@@ -195,42 +227,85 @@ class OnnxConfig(ABC):
|
||||
>= EXTERNAL_DATA_FORMAT_SIZE_LIMIT
|
||||
)
|
||||
|
||||
def _generate_dummy_images(
|
||||
self, batch_size: int = 2, num_channels: int = 3, image_height: int = 40, image_width: int = 40
|
||||
):
|
||||
images = []
|
||||
for _ in range(batch_size):
|
||||
data = np.random.rand(image_height, image_width, num_channels) * 255
|
||||
images.append(Image.fromarray(data.astype("uint8")).convert("RGB"))
|
||||
return images
|
||||
|
||||
def generate_dummy_inputs(
|
||||
self,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
preprocessor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"],
|
||||
batch_size: int = -1,
|
||||
seq_length: int = -1,
|
||||
is_pair: bool = False,
|
||||
framework: Optional[TensorType] = None,
|
||||
num_channels: int = 3,
|
||||
image_width: int = 40,
|
||||
image_height: int = 40,
|
||||
tokenizer: "PreTrainedTokenizerBase" = None,
|
||||
) -> Mapping[str, Any]:
|
||||
"""
|
||||
Generate inputs to provide to the ONNX exporter for the specific framework
|
||||
|
||||
Args:
|
||||
tokenizer: The tokenizer associated with this model configuration
|
||||
batch_size: The batch size (int) to export the model for (-1 means dynamic axis)
|
||||
seq_length: The sequence length (int) to export the model for (-1 means dynamic axis)
|
||||
is_pair: Indicate if the input is a pair (sentence 1, sentence 2)
|
||||
framework: The framework (optional) the tokenizer will generate tensor for
|
||||
preprocessor: ([`PreTrainedTokenizerBase`] or [`FeatureExtractionMixin`]):
|
||||
The preprocessor associated with this model configuration.
|
||||
batch_size (`int`, *optional*, defaults to -1):
|
||||
The batch size to export the model for (-1 means dynamic axis).
|
||||
seq_length (`int`, *optional*, defaults to -1):
|
||||
The sequence length to export the model for (-1 means dynamic axis).
|
||||
is_pair (`bool`, *optional*, defaults to `False`):
|
||||
Indicate if the input is a pair (sentence 1, sentence 2)
|
||||
framework (`TensorType`, *optional*, defaults to `None`):
|
||||
The framework (PyTorch or TensorFlow) that the tokenizer will generate tensors for.
|
||||
num_channels (`int`, *optional*, defaults to 3):
|
||||
The number of channels of the generated images.
|
||||
image_width (`int`, *optional*, defaults to 40):
|
||||
The width of the generated images.
|
||||
image_height (`int`, *optional*, defaults to 40):
|
||||
The height of the generated images.
|
||||
|
||||
Returns:
|
||||
Mapping[str, Tensor] holding the kwargs to provide to the model's forward function
|
||||
"""
|
||||
from ..feature_extraction_utils import FeatureExtractionMixin
|
||||
from ..tokenization_utils_base import PreTrainedTokenizerBase
|
||||
|
||||
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None:
|
||||
raise ValueError("You cannot provide both a tokenizer and a preprocessor to generate dummy inputs.")
|
||||
if tokenizer is not None:
|
||||
warnings.warn(
|
||||
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
logger.warning("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.")
|
||||
preprocessor = tokenizer
|
||||
if isinstance(preprocessor, PreTrainedTokenizerBase):
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
|
||||
batch_size = compute_effective_axis_dimension(
|
||||
batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
|
||||
batch_size, fixed_dimension=OnnxConfig.default_fixed_batch, num_token_to_add=0
|
||||
)
|
||||
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
|
||||
token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
|
||||
token_to_add = preprocessor.num_special_tokens_to_add(is_pair)
|
||||
seq_length = compute_effective_axis_dimension(
|
||||
seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
|
||||
seq_length, fixed_dimension=OnnxConfig.default_fixed_sequence, num_token_to_add=token_to_add
|
||||
)
|
||||
|
||||
# Generate dummy inputs according to compute batch and sequence
|
||||
dummy_input = [" ".join([tokenizer.unk_token]) * seq_length] * batch_size
|
||||
return dict(tokenizer(dummy_input, return_tensors=framework))
|
||||
dummy_input = [" ".join([preprocessor.unk_token]) * seq_length] * batch_size
|
||||
return dict(preprocessor(dummy_input, return_tensors=framework))
|
||||
elif isinstance(preprocessor, FeatureExtractionMixin) and preprocessor.model_input_names[0] == "pixel_values":
|
||||
# If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
|
||||
batch_size = compute_effective_axis_dimension(batch_size, fixed_dimension=OnnxConfig.default_fixed_batch)
|
||||
dummy_input = self._generate_dummy_images(batch_size, num_channels, image_height, image_width)
|
||||
return dict(preprocessor(images=dummy_input, return_tensors=framework))
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unable to generate dummy inputs for the model. Please provide a tokenizer or a preprocessor."
|
||||
)
|
||||
|
||||
def patch_ops(self):
|
||||
for spec in self._patching_specs:
|
||||
@@ -264,7 +339,7 @@ class OnnxConfig(ABC):
|
||||
class OnnxConfigWithPast(OnnxConfig, ABC):
|
||||
def __init__(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
config: "PretrainedConfig",
|
||||
task: str = "default",
|
||||
patching_specs: List[PatchingSpec] = None,
|
||||
use_past: bool = False,
|
||||
@@ -273,7 +348,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
|
||||
self.use_past = use_past
|
||||
|
||||
@classmethod
|
||||
def with_past(cls, config: PretrainedConfig, task: str = "default") -> "OnnxConfigWithPast":
|
||||
def with_past(cls, config: "PretrainedConfig", task: str = "default") -> "OnnxConfigWithPast":
|
||||
"""
|
||||
Instantiate a OnnxConfig with `use_past` attribute set to True
|
||||
|
||||
@@ -326,7 +401,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
|
||||
|
||||
def generate_dummy_inputs(
|
||||
self,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
tokenizer: "PreTrainedTokenizerBase",
|
||||
batch_size: int = -1,
|
||||
seq_length: int = -1,
|
||||
is_pair: bool = False,
|
||||
@@ -445,7 +520,7 @@ class OnnxSeq2SeqConfigWithPast(OnnxConfigWithPast):
|
||||
|
||||
def generate_dummy_inputs(
|
||||
self,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
tokenizer: "PreTrainedTokenizerBase",
|
||||
batch_size: int = -1,
|
||||
seq_length: int = -1,
|
||||
is_pair: bool = False,
|
||||
|
||||
@@ -12,18 +12,29 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import warnings
|
||||
from inspect import signature
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Tuple, Union
|
||||
from typing import TYPE_CHECKING, Iterable, List, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from packaging.version import Version, parse
|
||||
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizer, TensorType, TFPreTrainedModel, is_torch_available
|
||||
from transformers.file_utils import is_tf_available, is_torch_onnx_dict_inputs_support_available
|
||||
from transformers.onnx.config import OnnxConfig
|
||||
from transformers.utils import logging
|
||||
from ..file_utils import TensorType, is_tf_available, is_torch_available, is_torch_onnx_dict_inputs_support_available
|
||||
from ..utils import logging
|
||||
from .config import OnnxConfig
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
from ..modeling_utils import PreTrainedModel
|
||||
|
||||
if is_tf_available():
|
||||
from ..modeling_tf_utils import TFPreTrainedModel
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..feature_extraction_utils import FeatureExtractionMixin
|
||||
from ..tokenization_utils import PreTrainedTokenizer
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
@@ -63,18 +74,19 @@ def check_onnxruntime_requirements(minimum_version: Version):
|
||||
|
||||
|
||||
def export_pytorch(
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
model: PreTrainedModel,
|
||||
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"],
|
||||
model: "PreTrainedModel",
|
||||
config: OnnxConfig,
|
||||
opset: int,
|
||||
output: Path,
|
||||
tokenizer: "PreTrainedTokenizer" = None,
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Export a PyTorch model to an ONNX Intermediate Representation (IR)
|
||||
|
||||
Args:
|
||||
tokenizer ([`PreTrainedTokenizer`]):
|
||||
The tokenizer used for encoding the data.
|
||||
preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]):
|
||||
The preprocessor used for encoding the data.
|
||||
model ([`PreTrainedModel`]):
|
||||
The model to export.
|
||||
config ([`~onnx.config.OnnxConfig`]):
|
||||
@@ -88,6 +100,11 @@ def export_pytorch(
|
||||
`Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from
|
||||
the ONNX configuration.
|
||||
"""
|
||||
if tokenizer is not None:
|
||||
warnings.warn(
|
||||
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
if issubclass(type(model), PreTrainedModel):
|
||||
import torch
|
||||
from torch.onnx import export as onnx_export
|
||||
@@ -106,7 +123,9 @@ def export_pytorch(
|
||||
|
||||
# Ensure inputs match
|
||||
# TODO: Check when exporting QA we provide "is_pair=True"
|
||||
model_inputs = config.generate_dummy_inputs(tokenizer, framework=TensorType.PYTORCH)
|
||||
model_inputs = config.generate_dummy_inputs(
|
||||
preprocessor, tokenizer=tokenizer, framework=TensorType.PYTORCH
|
||||
)
|
||||
inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys())
|
||||
onnx_outputs = list(config.outputs.keys())
|
||||
|
||||
@@ -163,18 +182,19 @@ def export_pytorch(
|
||||
|
||||
|
||||
def export_tensorflow(
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
model: TFPreTrainedModel,
|
||||
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"],
|
||||
model: "TFPreTrainedModel",
|
||||
config: OnnxConfig,
|
||||
opset: int,
|
||||
output: Path,
|
||||
tokenizer: "PreTrainedTokenizer" = None,
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Export a TensorFlow model to an ONNX Intermediate Representation (IR)
|
||||
|
||||
Args:
|
||||
tokenizer ([`PreTrainedTokenizer`]):
|
||||
The tokenizer used for encoding the data.
|
||||
preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]):
|
||||
The preprocessor used for encoding the data.
|
||||
model ([`TFPreTrainedModel`]):
|
||||
The model to export.
|
||||
config ([`~onnx.config.OnnxConfig`]):
|
||||
@@ -193,6 +213,12 @@ def export_tensorflow(
|
||||
import onnx
|
||||
import tf2onnx
|
||||
|
||||
if tokenizer is not None:
|
||||
warnings.warn(
|
||||
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
|
||||
model.config.return_dict = True
|
||||
|
||||
# Check if we need to override certain configuration item
|
||||
@@ -203,7 +229,7 @@ def export_tensorflow(
|
||||
setattr(model.config, override_config_key, override_config_value)
|
||||
|
||||
# Ensure inputs match
|
||||
model_inputs = config.generate_dummy_inputs(tokenizer, framework=TensorType.TENSORFLOW)
|
||||
model_inputs = config.generate_dummy_inputs(preprocessor, tokenizer=tokenizer, framework=TensorType.TENSORFLOW)
|
||||
inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys())
|
||||
onnx_outputs = list(config.outputs.keys())
|
||||
|
||||
@@ -216,18 +242,19 @@ def export_tensorflow(
|
||||
|
||||
|
||||
def export(
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
model: Union[PreTrainedModel, TFPreTrainedModel],
|
||||
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"],
|
||||
model: Union["PreTrainedModel", "TFPreTrainedModel"],
|
||||
config: OnnxConfig,
|
||||
opset: int,
|
||||
output: Path,
|
||||
tokenizer: "PreTrainedTokenizer" = None,
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Export a Pytorch or TensorFlow model to an ONNX Intermediate Representation (IR)
|
||||
|
||||
Args:
|
||||
tokenizer ([`PreTrainedTokenizer`]):
|
||||
The tokenizer used for encoding the data.
|
||||
preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]):
|
||||
The preprocessor used for encoding the data.
|
||||
model ([`PreTrainedModel`] or [`TFPreTrainedModel`]):
|
||||
The model to export.
|
||||
config ([`~onnx.config.OnnxConfig`]):
|
||||
@@ -246,26 +273,37 @@ def export(
|
||||
"Cannot convert because neither PyTorch nor TensorFlow are not installed. "
|
||||
"Please install torch or tensorflow first."
|
||||
)
|
||||
if tokenizer is not None:
|
||||
warnings.warn(
|
||||
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use `preprocessor` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
|
||||
if is_torch_available():
|
||||
from transformers.file_utils import torch_version
|
||||
from ..file_utils import torch_version
|
||||
|
||||
if not is_torch_onnx_dict_inputs_support_available():
|
||||
raise AssertionError(f"Unsupported PyTorch version, minimum required is 1.8.0, got: {torch_version}")
|
||||
|
||||
if not config.is_torch_support_available:
|
||||
logger.warning(
|
||||
f"Unsupported PyTorch version for this model. Minimum required is {config.torch_onnx_minimum_version}, got: {torch_version}"
|
||||
)
|
||||
|
||||
if is_torch_available() and issubclass(type(model), PreTrainedModel):
|
||||
return export_pytorch(tokenizer, model, config, opset, output)
|
||||
return export_pytorch(preprocessor, model, config, opset, output, tokenizer=tokenizer)
|
||||
elif is_tf_available() and issubclass(type(model), TFPreTrainedModel):
|
||||
return export_tensorflow(tokenizer, model, config, opset, output)
|
||||
return export_tensorflow(preprocessor, model, config, opset, output, tokenizer=tokenizer)
|
||||
|
||||
|
||||
def validate_model_outputs(
|
||||
config: OnnxConfig,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
reference_model: Union[PreTrainedModel, TFPreTrainedModel],
|
||||
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"],
|
||||
reference_model: Union["PreTrainedModel", "TFPreTrainedModel"],
|
||||
onnx_model: Path,
|
||||
onnx_named_outputs: List[str],
|
||||
atol: float,
|
||||
tokenizer: "PreTrainedTokenizer" = None,
|
||||
):
|
||||
from onnxruntime import InferenceSession, SessionOptions
|
||||
|
||||
@@ -274,9 +312,13 @@ def validate_model_outputs(
|
||||
# TODO: generate inputs with a different batch_size and seq_len that was used for conversion to properly test
|
||||
# dynamic input shapes.
|
||||
if issubclass(type(reference_model), PreTrainedModel):
|
||||
reference_model_inputs = config.generate_dummy_inputs(tokenizer, framework=TensorType.PYTORCH)
|
||||
reference_model_inputs = config.generate_dummy_inputs(
|
||||
preprocessor, tokenizer=tokenizer, framework=TensorType.PYTORCH
|
||||
)
|
||||
else:
|
||||
reference_model_inputs = config.generate_dummy_inputs(tokenizer, framework=TensorType.TENSORFLOW)
|
||||
reference_model_inputs = config.generate_dummy_inputs(
|
||||
preprocessor, tokenizer=tokenizer, framework=TensorType.TENSORFLOW
|
||||
)
|
||||
|
||||
# Create ONNX Runtime session
|
||||
options = SessionOptions()
|
||||
@@ -354,7 +396,7 @@ def validate_model_outputs(
|
||||
|
||||
|
||||
def ensure_model_and_config_inputs_match(
|
||||
model: Union[PreTrainedModel, TFPreTrainedModel], model_inputs: Iterable[str]
|
||||
model: Union["PreTrainedModel", "TFPreTrainedModel"], model_inputs: Iterable[str]
|
||||
) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..models.marian import MarianOnnxConfig
|
||||
from ..models.mbart import MBartOnnxConfig
|
||||
from ..models.roberta import RobertaOnnxConfig
|
||||
from ..models.t5 import T5OnnxConfig
|
||||
from ..models.vit import ViTOnnxConfig
|
||||
from ..models.xlm_roberta import XLMRobertaOnnxConfig
|
||||
from ..utils import logging
|
||||
from .config import OnnxConfig
|
||||
@@ -28,6 +29,7 @@ if is_torch_available():
|
||||
from transformers.models.auto import (
|
||||
AutoModel,
|
||||
AutoModelForCausalLM,
|
||||
AutoModelForImageClassification,
|
||||
AutoModelForMaskedLM,
|
||||
AutoModelForMultipleChoice,
|
||||
AutoModelForQuestionAnswering,
|
||||
@@ -90,6 +92,7 @@ class FeaturesManager:
|
||||
"token-classification": AutoModelForTokenClassification,
|
||||
"multiple-choice": AutoModelForMultipleChoice,
|
||||
"question-answering": AutoModelForQuestionAnswering,
|
||||
"image-classification": AutoModelForImageClassification,
|
||||
}
|
||||
elif is_tf_available():
|
||||
_TASKS_TO_AUTOMODELS = {
|
||||
@@ -244,6 +247,7 @@ class FeaturesManager:
|
||||
"question-answering",
|
||||
onnx_config_cls=ElectraOnnxConfig,
|
||||
),
|
||||
"vit": supported_features_mapping("default", "image-classification", onnx_config_cls=ViTOnnxConfig),
|
||||
}
|
||||
|
||||
AVAILABLE_FEATURES = sorted(reduce(lambda s1, s2: s1 | s2, (v.keys() for v in _SUPPORTED_MODEL_TYPE.values())))
|
||||
|
||||
@@ -3,23 +3,25 @@ from tempfile import NamedTemporaryFile
|
||||
from unittest import TestCase
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from parameterized import parameterized
|
||||
from transformers import AutoConfig, AutoTokenizer, is_tf_available, is_torch_available
|
||||
from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer, is_tf_available, is_torch_available
|
||||
from transformers.onnx import (
|
||||
EXTERNAL_DATA_FORMAT_SIZE_LIMIT,
|
||||
OnnxConfig,
|
||||
OnnxConfigWithPast,
|
||||
ParameterFormat,
|
||||
export,
|
||||
validate_model_outputs,
|
||||
)
|
||||
from transformers.onnx.config import OnnxConfigWithPast
|
||||
|
||||
|
||||
if is_torch_available() or is_tf_available():
|
||||
from transformers.onnx.features import FeaturesManager
|
||||
|
||||
from transformers.onnx.utils import compute_effective_axis_dimension, compute_serialized_parameters_size
|
||||
from transformers.testing_utils import require_onnx, require_tf, require_torch, slow
|
||||
from transformers.testing_utils import require_onnx, require_tf, require_torch, require_vision, slow
|
||||
|
||||
|
||||
@require_onnx
|
||||
@@ -178,6 +180,7 @@ PYTORCH_EXPORT_MODELS = {
|
||||
("roberta", "roberta-base"),
|
||||
("xlm-roberta", "xlm-roberta-base"),
|
||||
("layoutlm", "microsoft/layoutlm-base-uncased"),
|
||||
("vit", "google/vit-base-patch16-224"),
|
||||
}
|
||||
|
||||
PYTORCH_EXPORT_WITH_PAST_MODELS = {
|
||||
@@ -241,25 +244,38 @@ class OnnxExportTestCaseV2(TestCase):
|
||||
def _onnx_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
|
||||
from transformers.onnx import export
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
|
||||
# Useful for causal lm models that do not use pad tokens.
|
||||
if not getattr(config, "pad_token_id", None):
|
||||
config.pad_token_id = tokenizer.eos_token_id
|
||||
|
||||
model_class = FeaturesManager.get_model_class_for_feature(feature)
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
model = model_class.from_config(config)
|
||||
onnx_config = onnx_config_class_constructor(model.config)
|
||||
|
||||
if is_torch_available():
|
||||
from transformers.file_utils import torch_version
|
||||
|
||||
if torch_version < onnx_config.torch_onnx_minimum_version:
|
||||
pytest.skip(
|
||||
f"Skipping due to incompatible PyTorch version. Minimum required is {onnx_config.torch_onnx_minimum_version}, got: {torch_version}"
|
||||
)
|
||||
|
||||
# Check the modality of the inputs and instantiate the appropriate preprocessor
|
||||
if model.main_input_name == "input_ids":
|
||||
preprocessor = AutoTokenizer.from_pretrained(model_name)
|
||||
# Useful for causal lm models that do not use pad tokens.
|
||||
if not getattr(config, "pad_token_id", None):
|
||||
config.pad_token_id = preprocessor.eos_token_id
|
||||
elif model.main_input_name == "pixel_values":
|
||||
preprocessor = AutoFeatureExtractor.from_pretrained(model_name)
|
||||
else:
|
||||
raise ValueError(f"Unsupported model input name: {model.main_input_name}")
|
||||
|
||||
with NamedTemporaryFile("w") as output:
|
||||
try:
|
||||
onnx_inputs, onnx_outputs = export(
|
||||
tokenizer, model, onnx_config, onnx_config.default_onnx_opset, Path(output.name)
|
||||
preprocessor, model, onnx_config, onnx_config.default_onnx_opset, Path(output.name)
|
||||
)
|
||||
validate_model_outputs(
|
||||
onnx_config,
|
||||
tokenizer,
|
||||
preprocessor,
|
||||
model,
|
||||
Path(output.name),
|
||||
onnx_outputs,
|
||||
@@ -271,6 +287,7 @@ class OnnxExportTestCaseV2(TestCase):
|
||||
@parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS))
|
||||
@slow
|
||||
@require_torch
|
||||
@require_vision
|
||||
def test_pytorch_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
|
||||
self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
|
||||
|
||||
@@ -291,6 +308,7 @@ class OnnxExportTestCaseV2(TestCase):
|
||||
@parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_DEFAULT_MODELS))
|
||||
@slow
|
||||
@require_tf
|
||||
@require_vision
|
||||
def test_tensorflow_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
|
||||
self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user