diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index c474df21bf..04d09267c6 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -45,7 +45,7 @@ jobs: source .env/bin/activate pip install --upgrade pip pip install torch!=1.6.0 - pip install .[sklearn,testing] + pip install .[sklearn,testing,onnxruntime] - name: Are GPUs recognized by our DL frameworks run: | diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index e1ccb853f4..b291176633 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -42,7 +42,7 @@ jobs: source .env/bin/activate pip install --upgrade pip pip install torch!=1.6.0 - pip install .[sklearn,testing] + pip install .[sklearn,testing,onnxruntime] - name: Are GPUs recognized by our DL frameworks run: | diff --git a/setup.py b/setup.py index 145e8e0d39..912b384668 100644 --- a/setup.py +++ b/setup.py @@ -74,16 +74,17 @@ extras["tf"] = [ # "onnxconverter-common", # "keras2onnx" "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common", - "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx" + "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx", ] extras["tf-cpu"] = [ "tensorflow-cpu", # "onnxconverter-common", # "keras2onnx" "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common", - "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx" + "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx", ] extras["torch"] = ["torch"] +extras["onnxruntime"] = ["onnxruntime>=1.4.0", "onnxruntime-tools>=1.4.2"] extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"] extras["all"] = extras["serving"] + ["tensorflow", "torch"] diff --git a/src/transformers/convert_graph_to_onnx.py b/src/transformers/convert_graph_to_onnx.py index f98ca912be..990895a501 100644 --- a/src/transformers/convert_graph_to_onnx.py +++ b/src/transformers/convert_graph_to_onnx.py @@ -364,32 +364,29 @@ def quantize(onnx_model_path: Path) -> Path: Returns: The Path generated for the quantized """ - try: - import onnx - from onnxruntime.quantization import QuantizationMode, quantize + import onnx + from onnxruntime.quantization import QuantizationMode, quantize - onnx_model = onnx.load(onnx_model_path.as_posix()) + onnx_model = onnx.load(onnx_model_path.as_posix()) - # Discussed with @yufenglee from ONNX runtime, this will be address in the next release of onnxruntime - print( - "As of onnxruntime 1.4.0, models larger than 2GB will fail to quantize due to protobuf constraint.\n" - "This limitation will be removed in the next release of onnxruntime." - ) + # Discussed with @yufenglee from ONNX runtime, this will be address in the next release of onnxruntime + print( + "As of onnxruntime 1.4.0, models larger than 2GB will fail to quantize due to protobuf constraint.\n" + "This limitation will be removed in the next release of onnxruntime." + ) - quantized_model = quantize( - model=onnx_model, quantization_mode=QuantizationMode.IntegerOps, force_fusions=True, symmetric_weight=True, - ) + quantized_model = quantize( + model=onnx_model, quantization_mode=QuantizationMode.IntegerOps, force_fusions=True, symmetric_weight=True, + ) - # Append "-quantized" at the end of the model's name - quantized_model_path = generate_identified_filename(onnx_model_path, "-quantized") + # Append "-quantized" at the end of the model's name + quantized_model_path = generate_identified_filename(onnx_model_path, "-quantized") - # Save model - print(f"Quantized model has been written at {quantized_model_path}: \N{heavy check mark}") - onnx.save_model(quantized_model, quantized_model_path.as_posix()) + # Save model + print(f"Quantized model has been written at {quantized_model_path}: \N{heavy check mark}") + onnx.save_model(quantized_model, quantized_model_path.as_posix()) - return quantized_model_path - except Exception as ie: - print(f"Error while quantizing the model:\n{str(ie)}") + return quantized_model_path def verify(path: Path):