Onnx fix test (#10663)
* Allow to pass kwargs to model's from_pretrained when using pipeline. * Disable the use of past_keys_values for GPT2 when exporting to ONNX. * style * Remove comment. * Appease the documentation gods * Fix style Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
This commit is contained in:
@@ -222,7 +222,9 @@ def infer_shapes(nlp: Pipeline, framework: str) -> Tuple[List[str], List[str], D
|
|||||||
return input_vars, output_names, dynamic_axes, tokens
|
return input_vars, output_names, dynamic_axes, tokens
|
||||||
|
|
||||||
|
|
||||||
def load_graph_from_args(pipeline_name: str, framework: str, model: str, tokenizer: Optional[str] = None) -> Pipeline:
|
def load_graph_from_args(
|
||||||
|
pipeline_name: str, framework: str, model: str, tokenizer: Optional[str] = None, **models_kwargs
|
||||||
|
) -> Pipeline:
|
||||||
"""
|
"""
|
||||||
Convert the set of arguments provided through the CLI to an actual pipeline reference (tokenizer + model
|
Convert the set of arguments provided through the CLI to an actual pipeline reference (tokenizer + model
|
||||||
|
|
||||||
@@ -248,7 +250,7 @@ def load_graph_from_args(pipeline_name: str, framework: str, model: str, tokeniz
|
|||||||
print(f"Loading pipeline (model: {model}, tokenizer: {tokenizer})")
|
print(f"Loading pipeline (model: {model}, tokenizer: {tokenizer})")
|
||||||
|
|
||||||
# Allocate tokenizer and model
|
# Allocate tokenizer and model
|
||||||
return pipeline(pipeline_name, model=model, tokenizer=tokenizer, framework=framework)
|
return pipeline(pipeline_name, model=model, tokenizer=tokenizer, framework=framework, model_kwargs=models_kwargs)
|
||||||
|
|
||||||
|
|
||||||
def convert_pytorch(nlp: Pipeline, opset: int, output: Path, use_external_format: bool):
|
def convert_pytorch(nlp: Pipeline, opset: int, output: Path, use_external_format: bool):
|
||||||
@@ -335,6 +337,7 @@ def convert(
|
|||||||
tokenizer: Optional[str] = None,
|
tokenizer: Optional[str] = None,
|
||||||
use_external_format: bool = False,
|
use_external_format: bool = False,
|
||||||
pipeline_name: str = "feature-extraction",
|
pipeline_name: str = "feature-extraction",
|
||||||
|
**model_kwargs
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Convert the pipeline object to the ONNX Intermediate Representation (IR) format
|
Convert the pipeline object to the ONNX Intermediate Representation (IR) format
|
||||||
@@ -347,6 +350,7 @@ def convert(
|
|||||||
tokenizer: The name of the model to load for the pipeline, default to the model's name if not provided
|
tokenizer: The name of the model to load for the pipeline, default to the model's name if not provided
|
||||||
use_external_format: Split the model definition from its parameters to allow model bigger than 2GB (PyTorch only)
|
use_external_format: Split the model definition from its parameters to allow model bigger than 2GB (PyTorch only)
|
||||||
pipeline_name: The kind of pipeline to instantiate (ner, question-answering, etc.)
|
pipeline_name: The kind of pipeline to instantiate (ner, question-answering, etc.)
|
||||||
|
model_kwargs: Keyword arguments to be forwarded to the model constructor
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
@@ -354,7 +358,7 @@ def convert(
|
|||||||
print(f"ONNX opset version set to: {opset}")
|
print(f"ONNX opset version set to: {opset}")
|
||||||
|
|
||||||
# Load the pipeline
|
# Load the pipeline
|
||||||
nlp = load_graph_from_args(pipeline_name, framework, model, tokenizer)
|
nlp = load_graph_from_args(pipeline_name, framework, model, tokenizer, **model_kwargs)
|
||||||
|
|
||||||
if not output.parent.exists():
|
if not output.parent.exists():
|
||||||
print(f"Creating folder {output.parent}")
|
print(f"Creating folder {output.parent}")
|
||||||
|
|||||||
@@ -246,6 +246,7 @@ def pipeline(
|
|||||||
framework: Optional[str] = None,
|
framework: Optional[str] = None,
|
||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
use_fast: bool = True,
|
use_fast: bool = True,
|
||||||
|
model_kwargs: Dict[str, Any] = {},
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> Pipeline:
|
) -> Pipeline:
|
||||||
"""
|
"""
|
||||||
@@ -307,6 +308,9 @@ def pipeline(
|
|||||||
artifacts on huggingface.co, so ``revision`` can be any identifier allowed by git.
|
artifacts on huggingface.co, so ``revision`` can be any identifier allowed by git.
|
||||||
use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||||
Whether or not to use a Fast tokenizer if possible (a :class:`~transformers.PreTrainedTokenizerFast`).
|
Whether or not to use a Fast tokenizer if possible (a :class:`~transformers.PreTrainedTokenizerFast`).
|
||||||
|
model_kwargs:
|
||||||
|
Additional dictionary of keyword arguments passed along to the model's :obj:`from_pretrained(...,
|
||||||
|
**model_kwargs)` function.
|
||||||
kwargs:
|
kwargs:
|
||||||
Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
|
Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
|
||||||
corresponding pipeline class for possible values).
|
corresponding pipeline class for possible values).
|
||||||
@@ -383,7 +387,6 @@ def pipeline(
|
|||||||
# Instantiate model if needed
|
# Instantiate model if needed
|
||||||
if isinstance(model, str):
|
if isinstance(model, str):
|
||||||
# Handle transparent TF/PT model conversion
|
# Handle transparent TF/PT model conversion
|
||||||
model_kwargs = {}
|
|
||||||
if framework == "pt" and model.endswith(".h5"):
|
if framework == "pt" and model.endswith(".h5"):
|
||||||
model_kwargs["from_tf"] = True
|
model_kwargs["from_tf"] = True
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|||||||
@@ -38,19 +38,23 @@ class FuncNonContiguousArgs:
|
|||||||
|
|
||||||
|
|
||||||
class OnnxExportTestCase(unittest.TestCase):
|
class OnnxExportTestCase(unittest.TestCase):
|
||||||
MODEL_TO_TEST = ["bert-base-cased", "gpt2", "roberta-base"]
|
MODEL_TO_TEST = [
|
||||||
|
# (model_name, model_kwargs)
|
||||||
|
("bert-base-cased", {}),
|
||||||
|
("gpt2", {"use_cache": False}), # We don't support exporting GPT2 past keys anymore
|
||||||
|
]
|
||||||
|
|
||||||
@require_tf
|
@require_tf
|
||||||
@slow
|
@slow
|
||||||
def test_export_tensorflow(self):
|
def test_export_tensorflow(self):
|
||||||
for model in OnnxExportTestCase.MODEL_TO_TEST:
|
for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
|
||||||
self._test_export(model, "tf", 12)
|
self._test_export(model, "tf", 12, **model_kwargs)
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@slow
|
@slow
|
||||||
def test_export_pytorch(self):
|
def test_export_pytorch(self):
|
||||||
for model in OnnxExportTestCase.MODEL_TO_TEST:
|
for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
|
||||||
self._test_export(model, "pt", 12)
|
self._test_export(model, "pt", 12, **model_kwargs)
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
@slow
|
@slow
|
||||||
@@ -71,8 +75,8 @@ class OnnxExportTestCase(unittest.TestCase):
|
|||||||
@require_tf
|
@require_tf
|
||||||
@slow
|
@slow
|
||||||
def test_quantize_tf(self):
|
def test_quantize_tf(self):
|
||||||
for model in OnnxExportTestCase.MODEL_TO_TEST:
|
for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
|
||||||
path = self._test_export(model, "tf", 12)
|
path = self._test_export(model, "tf", 12, **model_kwargs)
|
||||||
quantized_path = quantize(Path(path))
|
quantized_path = quantize(Path(path))
|
||||||
|
|
||||||
# Ensure the actual quantized model is not bigger than the original one
|
# Ensure the actual quantized model is not bigger than the original one
|
||||||
@@ -82,15 +86,15 @@ class OnnxExportTestCase(unittest.TestCase):
|
|||||||
@require_torch
|
@require_torch
|
||||||
@slow
|
@slow
|
||||||
def test_quantize_pytorch(self):
|
def test_quantize_pytorch(self):
|
||||||
for model in OnnxExportTestCase.MODEL_TO_TEST:
|
for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
|
||||||
path = self._test_export(model, "pt", 12)
|
path = self._test_export(model, "pt", 12, **model_kwargs)
|
||||||
quantized_path = quantize(path)
|
quantized_path = quantize(path)
|
||||||
|
|
||||||
# Ensure the actual quantized model is not bigger than the original one
|
# Ensure the actual quantized model is not bigger than the original one
|
||||||
if quantized_path.stat().st_size >= Path(path).stat().st_size:
|
if quantized_path.stat().st_size >= Path(path).stat().st_size:
|
||||||
self.fail("Quantized model is bigger than initial ONNX model")
|
self.fail("Quantized model is bigger than initial ONNX model")
|
||||||
|
|
||||||
def _test_export(self, model, framework, opset, tokenizer=None):
|
def _test_export(self, model, framework, opset, tokenizer=None, **model_kwargs):
|
||||||
try:
|
try:
|
||||||
# Compute path
|
# Compute path
|
||||||
with TemporaryDirectory() as tempdir:
|
with TemporaryDirectory() as tempdir:
|
||||||
@@ -101,7 +105,7 @@ class OnnxExportTestCase(unittest.TestCase):
|
|||||||
path.parent.rmdir()
|
path.parent.rmdir()
|
||||||
|
|
||||||
# Export
|
# Export
|
||||||
convert(framework, model, path, opset, tokenizer)
|
convert(framework, model, path, opset, tokenizer, **model_kwargs)
|
||||||
|
|
||||||
return path
|
return path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user