From 16f0b7d72c6d4e122957392c342b074aa2c5c519 Mon Sep 17 00:00:00 2001 From: lewtun Date: Tue, 11 Jan 2022 18:06:05 +0100 Subject: [PATCH] Update ONNX docs (#14904) * Remove docs for deprecated ONNX export * Tidy up the CLI help messages * Revamp ONNX docs * Update auto-config table * Use DistilBERT as example for consistency * Wrap up first pass at ONNX docs * Fix table check * Add tweaks and introduction * Add cross-ref * Fix missing import * Fix style * Add permalinks to ONNX configs * Clarify role of OrderedDict * Update docs/source/serialization.mdx Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Add doctest syntax to code blocks * Remove permalinks * Revert "Remove permalinks" This reverts commit 099701daf0db27823457867938efdb2d4f22a7c1. Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- docs/source/serialization.mdx | 514 ++++++++++++++++++------------ src/transformers/onnx/__main__.py | 12 +- utils/check_table.py | 4 +- 3 files changed, 319 insertions(+), 211 deletions(-) diff --git a/docs/source/serialization.mdx b/docs/source/serialization.mdx index 85b8ee8005..e2b52cd652 100644 --- a/docs/source/serialization.mdx +++ b/docs/source/serialization.mdx @@ -10,27 +10,38 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# Exporting transformers models +# Exporting 🤗 Transformers Models -## ONNX / ONNXRuntime +If you need to deploy 🤗 Transformers models in production environments, we +recommend exporting them to a serialized format that can be loaded and executed +on specialized runtimes and hardware. In this guide, we'll show you how to +export 🤗 Transformers models in two widely used formats: ONNX and TorchScript. -Projects [ONNX (Open Neural Network eXchange)](http://onnx.ai) and [ONNXRuntime (ORT)](https://microsoft.github.io/onnxruntime/) are part of an effort from leading industries in the AI field to provide a -unified and community-driven format to store and, by extension, efficiently execute neural network leveraging a variety -of hardware and dedicated optimizations. +Once exported, a model can optimized for inference via techniques such as +quantization and pruning. If you are interested in optimizing your models to run +with maximum efficiency, check out the [🤗 Optimum +library](https://github.com/huggingface/optimum). +## ONNX -Starting from transformers v2.10.0 we partnered with ONNX Runtime to provide an easy export of transformers models to -the ONNX format. You can have a look at the effort by looking at our joint blog post [Accelerate your NLP pipelines -using Hugging Face Transformers and ONNX Runtime](https://medium.com/microsoftazure/accelerate-your-nlp-pipelines-using-hugging-face-transformers-and-onnx-runtime-2443578f4333). +The [ONNX (Open Neural Network eXchange)](http://onnx.ai) project is an open +standard that defines a common set of operators and a common file format to +represent deep learning models in a wide variety of frameworks, including +PyTorch and TensorFlow. When a model is exported to the ONNX format, these +operators are used to construct a computational graph (often called an +_intermediate representation_) which represents the flow of data through the +neural network. +By exposing a graph with standardized operators and data types, ONNX makes it +easy to switch between frameworks. For example, a model trained in PyTorch can +be exported to ONNX format and then imported in TensorFlow (and vice versa). -### Configuration-based approach +🤗 Transformers provides a `transformers.onnx` package that enables you to +convert model checkpoints to an ONNX graph by leveraging configuration objects. +These configuration objects come ready made for a number of model architectures, +and are designed to be easily extendable to other architectures. -Transformers v4.9.0 introduces a new package: `transformers.onnx`. This package allows converting checkpoints to an -ONNX graph by leveraging configuration objects. These configuration objects come ready made for a number of model -architectures, and are made to be easily extendable to other architectures. - -Ready-made configurations include the following models: +Ready-made configurations include the following architectures: @@ -50,24 +61,30 @@ Ready-made configurations include the following models: - T5 - XLM-RoBERTa -This conversion is handled with the PyTorch version of models - it, therefore, requires PyTorch to be installed. If you -would like to be able to convert from TensorFlow, please let us know by opening an issue. +The ONNX conversion is supported for the PyTorch versions of the models. If you +would like to be able to convert a TensorFlow model, please let us know by +opening an issue. - +In the next two sections, we'll show you how to: -The models showcased here are close to fully feature complete, but do lack some features that are currently in -development. Namely, the ability to handle the past key values for decoder models is currently in the works. +* Export a supported model using the `transformers.onnx` package. +* Export a custom model for an unsupported architecture. - +### Exporting a model to ONNX -#### Converting an ONNX model using the `transformers.onnx` package +To export a 🤗 Transformers model to ONNX, you'll first need to install some +extra dependencies: -The package may be used as a Python module: +```bash +pip install transformers[onnx] +``` + +The `transformers.onnx` package can then be used as a Python module: ```bash python -m transformers.onnx --help -usage: Hugging Face ONNX Exporter tool [-h] -m MODEL -f {pytorch} [--features {default}] [--opset OPSET] [--atol ATOL] output +usage: Hugging Face Transformers ONNX exporter [-h] -m MODEL [--feature {causal-lm, ...}] [--opset OPSET] [--atol ATOL] output positional arguments: output Path indicating where to store generated ONNX model. @@ -75,232 +92,323 @@ positional arguments: optional arguments: -h, --help show this help message and exit -m MODEL, --model MODEL - Model's name of path on disk to load. - --features {default} Export the model with some additional features. - --opset OPSET ONNX opset version to export the model with (default 12). - --atol ATOL Absolute difference tolerance when validating the model. + Model ID on huggingface.co or path on disk to load model from. + --feature {causal-lm, ...} + The type of features to export the model with. + --opset OPSET ONNX opset version to export the model with. + --atol ATOL Absolute difference tolerence when validating the model. ``` Exporting a checkpoint using a ready-made configuration can be done as follows: ```bash -python -m transformers.onnx --model=bert-base-cased onnx/bert-base-cased/ +python -m transformers.onnx --model=distilbert-base-uncased onnx/ ``` -This exports an ONNX graph of the mentioned checkpoint. Here it is *bert-base-cased*, but it can be any model from the -hub, or a local path. - -It will be exported under `onnx/bert-base-cased`. You should see similar logs: +which should show the following logs: ```bash Validating ONNX model... - -[✓] ONNX model outputs' name match reference model ({'pooler_output', 'last_hidden_state'} -- Validating ONNX Model output "last_hidden_state": - -[✓] (2, 8, 768) matchs (2, 8, 768) - -[✓] all values close (atol: 0.0001) -- Validating ONNX Model output "pooler_output": - -[✓] (2, 768) matchs (2, 768) - -[✓] all values close (atol: 0.0001) -All good, model saved at: onnx/bert-base-cased/model.onnx + -[✓] ONNX model outputs' name match reference model ({'last_hidden_state'}) + - Validating ONNX Model output "last_hidden_state": + -[✓] (2, 8, 768) matches (2, 8, 768) + -[✓] all values close (atol: 1e-05) +All good, model saved at: onnx/model.onnx ``` -This export can now be used in the ONNX inference runtime: +This exports an ONNX graph of the checkpoint defined by the `--model` argument. +In this example it is `distilbert-base-uncased`, but it can be any model on the +Hugging Face Hub or one that's stored locally. + +The resulting `model.onnx` file can then be run on one of the [many +accelerators](https://onnx.ai/supported-tools.html#deployModel) that support the +ONNX standard. For example, we can load and run the model with [ONNX +Runtime](https://onnxruntime.ai/) as follows: ```python -import onnxruntime as ort +>>> from transformers import AutoTokenizer +>>> from onnxruntime import InferenceSession -from transformers import BertTokenizerFast - -tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased") - -ort_session = ort.InferenceSession("onnx/bert-base-cased/model.onnx") - -inputs = tokenizer("Using BERT in ONNX!", return_tensors="np") -outputs = ort_session.run(["last_hidden_state", "pooler_output"], dict(inputs)) +>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") +>>> session = InferenceSession("onnx/model.onnx") +>>> # ONNX Runtime expects NumPy arrays as input +>>> inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np") +>>> outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs)) ``` -The outputs used (`["last_hidden_state", "pooler_output"]`) can be obtained by taking a look at the ONNX -configuration of each model. For example, for BERT: +The required output names (i.e. `["last_hidden_state"]`) can be obtained by +taking a look at the ONNX configuration of each model. For example, for +DistilBERT we have: ```python -from transformers.models.bert import BertOnnxConfig, BertConfig +>>> from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig -config = BertConfig() -onnx_config = BertOnnxConfig(config) -output_keys = list(onnx_config.outputs.keys()) +>>> config = DistilBertConfig() +>>> onnx_config = DistilBertOnnxConfig(config) +>>> print(list(onnx_config.outputs.keys())) +["last_hidden_state"] ``` -#### Implementing a custom configuration for an unsupported architecture +### Selecting features for different model topologies -Let's take a look at the changes necessary to add a custom configuration for an unsupported architecture. Firstly, we -will need a custom ONNX configuration object that details the model inputs and outputs. The BERT ONNX configuration is -visible below: +Each ready-made configuration comes with a set of _features_ that enable you to +export models for different types of topologies or tasks. As shown in the table +below, each feature is associated with a different auto class: + +| Feature | Auto Class | +| ------------------------------------ | ------------------------------------ | +| `causal-lm`, `causal-lm-with-past` | `AutoModelForCausalLM` | +| `default`, `default-with-past` | `AutoModel` | +| `masked-lm` | `AutoModelForMaskedLM` | +| `question-answering` | `AutoModelForQuestionAnswering` | +| `seq2seq-lm`, `seq2seq-lm-with-past` | `AutoModelForSeq2SeqLM` | +| `sequence-classification` | `AutoModelForSequenceClassification` | +| `token-classification` | `AutoModelForTokenClassification` | + +For each configuration, you can find the list of supported features via the +`FeaturesManager`. For example, for DistilBERT we have: ```python -class BertOnnxConfig(OnnxConfig): - @property - def inputs(self) -> Mapping[str, Mapping[int, str]]: - return OrderedDict( - [ - ("input_ids", {0: "batch", 1: "sequence"}), - ("attention_mask", {0: "batch", 1: "sequence"}), - ("token_type_ids", {0: "batch", 1: "sequence"}), - ] - ) +>>> from transformers.onnx.features import FeaturesManager - @property - def outputs(self) -> Mapping[str, Mapping[int, str]]: - return OrderedDict([("last_hidden_state", {0: "batch", 1: "sequence"}), ("pooler_output", {0: "batch"})]) +>>> distilbert_features = list(FeaturesManager.get_supported_features_for_model_type("distilbert").keys()) +>>> print(distilbert_features) +["default", "masked-lm", "causal-lm", "sequence-classification", "token-classification", "question-answering"] ``` -Let's understand what's happening here. This configuration has two properties: the inputs, and the outputs. - -The inputs return a dictionary, where each key corresponds to an expected input, and each value indicates the axis of -that input. - -For BERT, there are three necessary inputs. These three inputs are of similar shape, which is made up of two -dimensions: the batch is the first dimension, and the second is the sequence. - -The outputs return a similar dictionary, where, once again, each key corresponds to an expected output, and each value -indicates the axis of that output. - -Once this is done, a single step remains: adding this configuration object to the initialisation of the model class, -and to the general `transformers` initialisation. - -An important fact to notice is the use of *OrderedDict* in both inputs and outputs properties. This is a requirements -as inputs are matched against their relative position within the *PreTrainedModel.forward()* prototype and outputs are -match against there position in the returned *BaseModelOutputX* instance. - -An example of such an addition is visible here, for the MBart model: [Making MBART ONNX-convertible](https://github.com/huggingface/transformers/pull/13049/commits/d097adcebd89a520f04352eb215a85916934204f) - -If you would like to contribute your addition to the library, we recommend you implement tests. An example of such -tests is visible here: [Adding tests to the MBART ONNX conversion](https://github.com/huggingface/transformers/pull/13049/commits/5d642f65abf45ceeb72bd855ca7bfe2506a58e6a) - -### Graph conversion - - - -The approach detailed here is bing deprecated. We recommend you follow the part above for an up to date approach. - - - -Exporting a model is done through the script *convert_graph_to_onnx.py* at the root of the transformers sources. The -following command shows how easy it is to export a BERT model from the library, simply run: +You can then pass one of these features to the `--feature` argument in the +`transformers.onnx` package. For example, to export a text-classification model +we can pick a fine-tuned model from the Hub and run: ```bash -python convert_graph_to_onnx.py --framework --model bert-base-cased bert-base-cased.onnx +python -m transformers.onnx --model=distilbert-base-uncased-finetuned-sst-2-english \ + --feature=sequence-classification onnx/ ``` -The conversion tool works for both PyTorch and Tensorflow models and ensures: - -- The model and its weights are correctly initialized from the Hugging Face model hub or a local checkpoint. -- The inputs and outputs are correctly generated to their ONNX counterpart. -- The generated model can be correctly loaded through onnxruntime. - - - -Currently, inputs and outputs are always exported with dynamic sequence axes preventing some optimizations on the -ONNX Runtime. If you would like to see such support for fixed-length inputs/outputs, please open up an issue on -transformers. - - - -Also, the conversion tool supports different options which let you tune the behavior of the generated model: - -- **Change the target opset version of the generated model.** (More recent opset generally supports more operators and - enables faster inference) - -- **Export pipeline-specific prediction heads.** (Allow to export model along with its task-specific prediction - head(s)) - -- **Use the external data format (PyTorch only).** (Lets you export model which size is above 2Gb ([More info](https://github.com/pytorch/pytorch/pull/33062))) - - -### Optimizations - -ONNXRuntime includes some transformers-specific transformations to leverage optimized operations in the graph. Below -are some of the operators which can be enabled to speed up inference through ONNXRuntime (*see note below*): - -- Constant folding -- Attention Layer fusing -- Skip connection LayerNormalization fusing -- FastGeLU approximation - -Some of the optimizations performed by ONNX runtime can be hardware specific and thus lead to different performances if -used on another machine with a different hardware configuration than the one used for exporting the model. For this -reason, when using `convert_graph_to_onnx.py` optimizations are not enabled, ensuring the model can be easily -exported to various hardware. Optimizations can then be enabled when loading the model through ONNX runtime for -inference. - - - - -When quantization is enabled (see below), `convert_graph_to_onnx.py` script will enable optimizations on the -model because quantization would modify the underlying graph making it impossible for ONNX runtime to do the -optimizations afterwards. - - - - - -For more information about the optimizations enabled by ONNXRuntime, please have a look at the [ONNXRuntime Github](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/python/tools/transformers). - - - -### Quantization - -ONNX exporter supports generating a quantized version of the model to allow efficient inference. - -Quantization works by converting the memory representation of the parameters in the neural network to a compact integer -format. By default, weights of a neural network are stored as single-precision float (*float32*) which can express a -wide-range of floating-point numbers with decent precision. These properties are especially interesting at training -where you want fine-grained representation. - -On the other hand, after the training phase, it has been shown one can greatly reduce the range and the precision of -*float32* numbers without changing the performances of the neural network. - -More technically, *float32* parameters are converted to a type requiring fewer bits to represent each number, thus -reducing the overall size of the model. Here, we are enabling *float32* mapping to *int8* values (a non-floating, -single byte, number representation) according to the following formula: - -$$y_{float32} = scale * x_{int8} - zero\_point$$ - - - -The quantization process will infer the parameter *scale* and *zero_point* from the neural network parameters - - - -Leveraging tiny-integers has numerous advantages when it comes to inference: - -- Storing fewer bits instead of 32 bits for the *float32* reduces the size of the model and makes it load faster. -- Integer operations execute a magnitude faster on modern hardware -- Integer operations require less power to do the computations - -In order to convert a transformers model to ONNX IR with quantized weights you just need to specify `--quantize` when -using `convert_graph_to_onnx.py`. Also, you can have a look at the `quantize()` utility-method in this same script -file. - -Example of quantized BERT model export: +which will display the following logs: ```bash -python convert_graph_to_onnx.py --framework --model bert-base-cased --quantize bert-base-cased.onnx +Validating ONNX model... + -[✓] ONNX model outputs' name match reference model ({'logits'}) + - Validating ONNX Model output "logits": + -[✓] (2, 2) matches (2, 2) + -[✓] all values close (atol: 1e-05) +All good, model saved at: onnx/model.onnx +``` + +Notice that in this case, the output names from the fine-tuned model are +`logits` instead of the `last_hidden_state` we saw with the +`distilbert-base-uncased` checkpoint earlier. This is expected since the +fine-tuned model has a sequence classification head. + + + +The features that have a `with-past` suffix (e.g. `causal-lm-with-past`) +correspond to model topologies with precomputed hidden states (key and values +in the attention blocks) that can be used for fast autoregressive decoding. + + + + +### Exporting a model for an unsupported architecture + +If you wish to export a model whose architecture is not natively supported by +the library, there are three main steps to follow: + +1. Implement a custom ONNX configuration. +2. Export the model to ONNX. +3. Validate the outputs of the PyTorch and exported models. + +In this section, we'll look at how DistilBERT was implemented to show what's +involved with each step. + +#### Implementing a custom ONNX configuration + +Let's start with the ONNX configuration object. We provide three abstract +classes that you should inherit from, depending on the type of model +architecture you wish to export: + +* Encoder-based models inherit from [`OnnxConfig`](https://github.com/huggingface/transformers/blob/c4fa908fa98c3d538462c537d29b7613dd71306e/src/transformers/onnx/config.py#L52) +* Decoder-based models inherit from [`OnnxConfigWithPast`](https://github.com/huggingface/transformers/blob/c4fa908fa98c3d538462c537d29b7613dd71306e/src/transformers/onnx/config.py#L264) +* Encoder-decoder models inherit from [`OnnxSeq2SeqConfigWithPast`](https://github.com/huggingface/transformers/blob/c4fa908fa98c3d538462c537d29b7613dd71306e/src/transformers/onnx/config.py#L399) + + + +A good way to implement a custom ONNX configuration is to look at the existing +implementation in the `configuration_.py` file of a similar architecture. + + + +Since DistilBERT is an encoder-based model, its configuration inherits from +`OnnxConfig`: + +```python +>>> from typing import Mapping, OrderedDict +>>> from transformers.onnx import OnnxConfig + + +>>> class DistilBertOnnxConfig(OnnxConfig): +... @property +... def inputs(self) -> Mapping[str, Mapping[int, str]]: +... return OrderedDict( +... [ +... ("input_ids", {0: "batch", 1: "sequence"}), +... ("attention_mask", {0: "batch", 1: "sequence"}), +... ] +... ) +``` + +Every configuration object must implement the `inputs` property and return a +mapping, where each key corresponds to an expected input, and each value +indicates the axis of that input. For DistilBERT, we can see that two inputs are +required: `input_ids` and `attention_mask`. These inputs have the same shape of +`(batch_size, sequence_length)` which is why we see the same axes used in the +configuration. + + + +Notice that `inputs` property for `DistilBertOnnxConfig` returns an +`OrderedDict`. This ensures that the inputs are matched with their relative +position within the `PreTrainedModel.forward()` method when tracing the graph. +We recommend using an `OrderedDict` for the `inputs` and `outputs` properties +when implementing custom ONNX configurations. + + + +Once you have implemented an ONNX configuration, you can instantiate it by +providing the base model's configuration as follows: + +```python +>>> from transformers import AutoConfig + +>>> config = AutoConfig.from_pretrained("distilbert-base-uncased") +>>> onnx_config = DistilBertOnnxConfig(config) +``` + +The resulting object has several useful properties. For example you can view the +ONNX operator set that will be used during the export: + +```python +>>> print(onnx_config.default_onnx_opset) +11 +``` + +You can also view the outputs associated with the model as follows: + +```python +>>> print(onnx_config.outputs) +OrderedDict([("last_hidden_state", {0: "batch", 1: "sequence"})]) +``` + +Notice that the outputs property follows the same structure as the inputs; it +returns an `OrderedDict` of named outputs and their shapes. The output structure +is linked to the choice of feature that the configuration is initialised with. +By default, the ONNX configuration is initialized with the `default` feature +that corresponds to exporting a model loaded with the `AutoModel` class. If you +want to export a different model topology, just provide a different feature to +the `task` argument when you initialize the ONNX configuration. For example, if +we wished to export DistilBERT with a sequence classification head, we could +use: + +```python +>>> from transformers import AutoConfig + +>>> config = AutoConfig.from_pretrained("distilbert-base-uncased") +>>> onnx_config_for_seq_clf = DistilBertOnnxConfig(config, task="sequence-classification") +>>> print(onnx_config_for_seq_clf.outputs) +OrderedDict([('logits', {0: 'batch'})]) ``` -Quantization support requires ONNX Runtime >= 1.4.0 +All of the base properties and methods associated with +[`OnnxConfig`] +and the other configuration classes can be overriden if needed. Check out +[`BartOnnxConfig`] +for an advanced example. +#### Exporting the model + +Once you have implemented the ONNX configuration, the next step is to export the +model. Here we can use the `export()` function provided by the +`transformers.onnx` package. This function expects the ONNX configuration, along +with the base model and tokenizer, and the path to save the exported file: + +```python +>>> from pathlib import Path +>>> from transformers.onnx import export +>>> from transformers import AutoTokenizer, AutoModel + +>>> onnx_path = Path("model.onnx") +>>> model_ckpt = "distilbert-base-uncased" +>>> base_model = AutoModel.from_pretrained(model_ckpt) +>>> tokenizer = AutoTokenizer.from_pretrained(model_ckpt) + +>>> onnx_inputs, onnx_outputs = export(tokenizer, base_model, onnx_config, onnx_config.default_onnx_opset, onnx_path) +``` + +The `onnx_inputs` and `onnx_outputs` returned by the `export()` function are +lists of the keys defined in the `inputs` and `outputs` properties of the +configuration. Once the model is exported, you can test that the model is well +formed as follows: + +```python +>>> import onnx + +>>> onnx_model = onnx.load("model.onnx") +>>> onnx.checker.check_model(onnx_model) +``` + -When exporting quantized model you will end up with two different ONNX files. The one specified at the end of the -above command will contain the original ONNX model storing *float32* weights. The second one, with `-quantized` -suffix, will hold the quantized parameters. +If your model is larger than 2GB, you will see that many additional files are +created during the export. This is _expected_ because ONNX uses [Protocol +Buffers](https://developers.google.com/protocol-buffers/) to store the model and +these have a size limit of 2GB. See the [ONNX +documentation](https://github.com/onnx/onnx/blob/master/docs/ExternalData.md) +for instructions on how to load models with external data. +#### Validating the model outputs + +The final step is to validate that the outputs from the base and exported model +agree within some absolute tolerance. Here we can use the +`validate_model_outputs()` function provided by the `transformers.onnx` package +as follows: + +```python +>>> from transformers.onnx import validate_model_outputs + +>>> validate_model_outputs( +... onnx_config, tokenizer, base_model, onnx_path, onnx_outputs, onnx_config.atol_for_validation +... ) +``` + +This function uses the `OnnxConfig.generate_dummy_inputs()` method to generate +inputs for the base and exported model, and the absolute tolerance can be +defined in the configuration. We generally find numerical agreement in the 1e-6 +to 1e-4 range, although anything smaller than 1e-3 is likely to be OK. + +### Contributing a new configuration to 🤗 Transformers + +We are looking to expand the set of ready-made configurations and welcome +contributions from the community! If you would like to contribute your addition +to the library, you will need to: + +* Implement the ONNX configuration in the corresponding `configuration_.py` +file +* Include the model architecture and corresponding features in +[`onnx.features.FeatureManager`](https://github.com/huggingface/transformers/blob/c4fa908fa98c3d538462c537d29b7613dd71306e/src/transformers/onnx/features.py#L71) +* Add your model architecture to the tests in +`test_onnx_v2.py` + +Check out how the configuration for [IBERT was +contributed](https://github.com/huggingface/transformers/pull/14868/files) to +get an idea of what's involved. + ## TorchScript diff --git a/src/transformers/onnx/__main__.py b/src/transformers/onnx/__main__.py index eb5d2773b0..bb54717289 100644 --- a/src/transformers/onnx/__main__.py +++ b/src/transformers/onnx/__main__.py @@ -23,17 +23,17 @@ from .features import FeaturesManager def main(): - parser = ArgumentParser("Hugging Face ONNX Exporter tool") - parser.add_argument("-m", "--model", type=str, required=True, help="Model's name of path on disk to load.") + parser = ArgumentParser("Hugging Face Transformers ONNX exporter") + parser.add_argument( + "-m", "--model", type=str, required=True, help="Model ID on huggingface.co or path on disk to load model from." + ) parser.add_argument( "--feature", choices=list(FeaturesManager.AVAILABLE_FEATURES), default="default", - help="Export the model with some additional feature.", - ) - parser.add_argument( - "--opset", type=int, default=None, help="ONNX opset version to export the model with (default 12)." + help="The type of features to export the model with.", ) + parser.add_argument("--opset", type=int, default=None, help="ONNX opset version to export the model with.") parser.add_argument( "--atol", type=float, default=None, help="Absolute difference tolerence when validating the model." ) diff --git a/utils/check_table.py b/utils/check_table.py index 17ab62e4aa..9c81937f44 100644 --- a/utils/check_table.py +++ b/utils/check_table.py @@ -207,11 +207,11 @@ def get_onnx_model_list(): def check_onnx_model_list(overwrite=False): - """Check the model list in the serialization.rst is consistent with the state of the lib and maybe `overwrite`.""" + """Check the model list in the serialization.mdx is consistent with the state of the lib and maybe `overwrite`.""" current_list, start_index, end_index, lines = _find_text_in_file( filename=os.path.join(PATH_TO_DOCS, "serialization.mdx"), start_prompt="", - end_prompt="This conversion is handled with the PyTorch version of models ", + end_prompt="The ONNX conversion is supported for the PyTorch versions of the models.", ) new_list = get_onnx_model_list()