diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
index c474df21bf..04d09267c6 100644
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -45,7 +45,7 @@ jobs:
         source .env/bin/activate
         pip install --upgrade pip
         pip install torch!=1.6.0
-        pip install .[sklearn,testing]
+        pip install .[sklearn,testing,onnxruntime]
 
     - name: Are GPUs recognized by our DL frameworks
       run: |
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index e1ccb853f4..b291176633 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -42,7 +42,7 @@ jobs:
         source .env/bin/activate
         pip install --upgrade pip
         pip install torch!=1.6.0
-        pip install .[sklearn,testing]
+        pip install .[sklearn,testing,onnxruntime]
 
     - name: Are GPUs recognized by our DL frameworks
       run: |
diff --git a/setup.py b/setup.py
index 145e8e0d39..912b384668 100644
--- a/setup.py
+++ b/setup.py
@@ -74,16 +74,17 @@ extras["tf"] = [
     # "onnxconverter-common",
     # "keras2onnx"
     "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common",
-    "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx"
+    "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx",
 ]
 extras["tf-cpu"] = [
     "tensorflow-cpu",
     # "onnxconverter-common",
     # "keras2onnx"
     "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common",
-    "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx"
+    "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx",
 ]
 extras["torch"] = ["torch"]
+extras["onnxruntime"] = ["onnxruntime>=1.4.0", "onnxruntime-tools>=1.4.2"]
 
 extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"]
 extras["all"] = extras["serving"] + ["tensorflow", "torch"]
diff --git a/src/transformers/convert_graph_to_onnx.py b/src/transformers/convert_graph_to_onnx.py
index f98ca912be..990895a501 100644
--- a/src/transformers/convert_graph_to_onnx.py
+++ b/src/transformers/convert_graph_to_onnx.py
@@ -364,32 +364,29 @@ def quantize(onnx_model_path: Path) -> Path:
 
     Returns: The Path generated for the quantized
     """
-    try:
-        import onnx
-        from onnxruntime.quantization import QuantizationMode, quantize
+    import onnx
+    from onnxruntime.quantization import QuantizationMode, quantize
 
-        onnx_model = onnx.load(onnx_model_path.as_posix())
+    onnx_model = onnx.load(onnx_model_path.as_posix())
 
-        # Discussed with @yufenglee from ONNX runtime, this will be address in the next release of onnxruntime
-        print(
-            "As of onnxruntime 1.4.0, models larger than 2GB will fail to quantize due to protobuf constraint.\n"
-            "This limitation will be removed in the next release of onnxruntime."
-        )
+    # Discussed with @yufenglee from ONNX runtime, this will be address in the next release of onnxruntime
+    print(
+        "As of onnxruntime 1.4.0, models larger than 2GB will fail to quantize due to protobuf constraint.\n"
+        "This limitation will be removed in the next release of onnxruntime."
+    )
 
-        quantized_model = quantize(
-            model=onnx_model, quantization_mode=QuantizationMode.IntegerOps, force_fusions=True, symmetric_weight=True,
-        )
+    quantized_model = quantize(
+        model=onnx_model, quantization_mode=QuantizationMode.IntegerOps, force_fusions=True, symmetric_weight=True,
+    )
 
-        # Append "-quantized" at the end of the model's name
-        quantized_model_path = generate_identified_filename(onnx_model_path, "-quantized")
+    # Append "-quantized" at the end of the model's name
+    quantized_model_path = generate_identified_filename(onnx_model_path, "-quantized")
 
-        # Save model
-        print(f"Quantized model has been written at {quantized_model_path}: \N{heavy check mark}")
-        onnx.save_model(quantized_model, quantized_model_path.as_posix())
+    # Save model
+    print(f"Quantized model has been written at {quantized_model_path}: \N{heavy check mark}")
+    onnx.save_model(quantized_model, quantized_model_path.as_posix())
 
-        return quantized_model_path
-    except Exception as ie:
-        print(f"Error while quantizing the model:\n{str(ie)}")
+    return quantized_model_path
 
 
 def verify(path: Path):