[Test refactor 1/5] Per-folder tests reorganization (#15725)

* Per-folder tests reorganization

Co-authored-by: sgugger <sylvain.gugger@gmail.com>
Co-authored-by: Stas Bekman <stas@stason.org>
This commit is contained in:
Lysandre Debut
2022-02-23 15:46:28 -05:00
committed by GitHub
parent fecb08c2b8
commit 29c10a41d0
438 changed files with 636 additions and 565 deletions

0
tests/onnx/__init__.py Normal file
View File

195
tests/onnx/test_onnx.py Normal file
View File

@@ -0,0 +1,195 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from pathlib import Path
from tempfile import NamedTemporaryFile, TemporaryDirectory
from transformers import BertConfig, BertTokenizerFast, FeatureExtractionPipeline
from transformers.convert_graph_to_onnx import (
convert,
ensure_valid_input,
generate_identified_filename,
infer_shapes,
quantize,
)
from transformers.testing_utils import require_tf, require_tokenizers, require_torch, slow
class FuncContiguousArgs:
def forward(self, input_ids, token_type_ids, attention_mask):
return None
class FuncNonContiguousArgs:
def forward(self, input_ids, some_other_args, token_type_ids, attention_mask):
return None
class OnnxExportTestCase(unittest.TestCase):
MODEL_TO_TEST = [
# (model_name, model_kwargs)
("bert-base-cased", {}),
("gpt2", {"use_cache": False}), # We don't support exporting GPT2 past keys anymore
]
@require_tf
@slow
def test_export_tensorflow(self):
for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
self._test_export(model, "tf", 12, **model_kwargs)
@require_torch
@slow
def test_export_pytorch(self):
for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
self._test_export(model, "pt", 12, **model_kwargs)
@require_torch
@slow
def test_export_custom_bert_model(self):
from transformers import BertModel
vocab = ["[UNK]", "[SEP]", "[CLS]", "[PAD]", "[MASK]", "some", "other", "words"]
with NamedTemporaryFile(mode="w+t") as vocab_file:
vocab_file.write("\n".join(vocab))
vocab_file.flush()
tokenizer = BertTokenizerFast(vocab_file.name)
with TemporaryDirectory() as bert_save_dir:
model = BertModel(BertConfig(vocab_size=len(vocab)))
model.save_pretrained(bert_save_dir)
self._test_export(bert_save_dir, "pt", 12, tokenizer)
@require_tf
@slow
def test_quantize_tf(self):
for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
path = self._test_export(model, "tf", 12, **model_kwargs)
quantized_path = quantize(Path(path))
# Ensure the actual quantized model is not bigger than the original one
if quantized_path.stat().st_size >= Path(path).stat().st_size:
self.fail("Quantized model is bigger than initial ONNX model")
@require_torch
@slow
def test_quantize_pytorch(self):
for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
path = self._test_export(model, "pt", 12, **model_kwargs)
quantized_path = quantize(path)
# Ensure the actual quantized model is not bigger than the original one
if quantized_path.stat().st_size >= Path(path).stat().st_size:
self.fail("Quantized model is bigger than initial ONNX model")
def _test_export(self, model, framework, opset, tokenizer=None, **model_kwargs):
try:
# Compute path
with TemporaryDirectory() as tempdir:
path = Path(tempdir).joinpath("model.onnx")
# Remove folder if exists
if path.parent.exists():
path.parent.rmdir()
# Export
convert(framework, model, path, opset, tokenizer, **model_kwargs)
return path
except Exception as e:
self.fail(e)
@require_torch
@require_tokenizers
@slow
def test_infer_dynamic_axis_pytorch(self):
"""
Validate the dynamic axis generated for each parameters are correct
"""
from transformers import BertModel
model = BertModel(BertConfig.from_pretrained("lysandre/tiny-bert-random"))
tokenizer = BertTokenizerFast.from_pretrained("lysandre/tiny-bert-random")
self._test_infer_dynamic_axis(model, tokenizer, "pt")
@require_tf
@require_tokenizers
@slow
def test_infer_dynamic_axis_tf(self):
"""
Validate the dynamic axis generated for each parameters are correct
"""
from transformers import TFBertModel
model = TFBertModel(BertConfig.from_pretrained("lysandre/tiny-bert-random"))
tokenizer = BertTokenizerFast.from_pretrained("lysandre/tiny-bert-random")
self._test_infer_dynamic_axis(model, tokenizer, "tf")
def _test_infer_dynamic_axis(self, model, tokenizer, framework):
feature_extractor = FeatureExtractionPipeline(model, tokenizer)
variable_names = ["input_ids", "token_type_ids", "attention_mask", "output_0", "output_1"]
input_vars, output_vars, shapes, tokens = infer_shapes(feature_extractor, framework)
# Assert all variables are present
self.assertEqual(len(shapes), len(variable_names))
self.assertTrue(all([var_name in shapes for var_name in variable_names]))
self.assertSequenceEqual(variable_names[:3], input_vars)
self.assertSequenceEqual(variable_names[3:], output_vars)
# Assert inputs are {0: batch, 1: sequence}
for var_name in ["input_ids", "token_type_ids", "attention_mask"]:
self.assertDictEqual(shapes[var_name], {0: "batch", 1: "sequence"})
# Assert outputs are {0: batch, 1: sequence} and {0: batch}
self.assertDictEqual(shapes["output_0"], {0: "batch", 1: "sequence"})
self.assertDictEqual(shapes["output_1"], {0: "batch"})
def test_ensure_valid_input(self):
"""
Validate parameters are correctly exported
GPT2 has "past" parameter in the middle of input_ids, token_type_ids and attention_mask.
ONNX doesn't support export with a dictionary, only a tuple. Thus we need to ensure we remove
token_type_ids and attention_mask for now to not having a None tensor in the middle
"""
# All generated args are valid
input_names = ["input_ids", "attention_mask", "token_type_ids"]
tokens = {"input_ids": [1, 2, 3, 4], "attention_mask": [0, 0, 0, 0], "token_type_ids": [1, 1, 1, 1]}
ordered_input_names, inputs_args = ensure_valid_input(FuncContiguousArgs(), tokens, input_names)
# Should have exactly the same number of args (all are valid)
self.assertEqual(len(inputs_args), 3)
# Should have exactly the same input names
self.assertEqual(set(ordered_input_names), set(input_names))
# Parameter should be reordered according to their respective place in the function:
# (input_ids, token_type_ids, attention_mask)
self.assertEqual(inputs_args, (tokens["input_ids"], tokens["token_type_ids"], tokens["attention_mask"]))
# Generated args are interleaved with another args (for instance parameter "past" in GPT2)
ordered_input_names, inputs_args = ensure_valid_input(FuncNonContiguousArgs(), tokens, input_names)
# Should have exactly the one arg (all before the one not provided "some_other_args")
self.assertEqual(len(inputs_args), 1)
self.assertEqual(len(ordered_input_names), 1)
# Should have only "input_ids"
self.assertEqual(inputs_args[0], tokens["input_ids"])
self.assertEqual(ordered_input_names[0], "input_ids")
def test_generate_identified_name(self):
generated = generate_identified_filename(Path("/home/something/my_fake_model.onnx"), "-test")
self.assertEqual("/home/something/my_fake_model-test.onnx", generated.as_posix())

308
tests/onnx/test_onnx_v2.py Normal file
View File

@@ -0,0 +1,308 @@
from pathlib import Path
from tempfile import NamedTemporaryFile
from unittest import TestCase
from unittest.mock import patch
from parameterized import parameterized
from transformers import AutoConfig, AutoTokenizer, is_tf_available, is_torch_available
from transformers.onnx import (
EXTERNAL_DATA_FORMAT_SIZE_LIMIT,
OnnxConfig,
ParameterFormat,
export,
validate_model_outputs,
)
from transformers.onnx.config import OnnxConfigWithPast
if is_torch_available() or is_tf_available():
from transformers.onnx.features import FeaturesManager
from transformers.onnx.utils import compute_effective_axis_dimension, compute_serialized_parameters_size
from transformers.testing_utils import require_onnx, require_tf, require_torch, slow
@require_onnx
class OnnxUtilsTestCaseV2(TestCase):
"""
Cover all the utilities involved to export ONNX models
"""
@require_torch
@patch("transformers.onnx.convert.is_torch_onnx_dict_inputs_support_available", return_value=False)
def test_ensure_pytorch_version_ge_1_8_0(self, mock_is_torch_onnx_dict_inputs_support_available):
"""
Ensure we raise an Exception if the pytorch version is unsupported (< 1.8.0)
"""
self.assertRaises(AssertionError, export, None, None, None, None, None)
mock_is_torch_onnx_dict_inputs_support_available.assert_called()
def test_compute_effective_axis_dimension(self):
"""
When exporting ONNX model with dynamic axis (batch or sequence) we set batch_size and/or sequence_length = -1.
We cannot generate an effective tensor with axis dim == -1, so we trick by using some "fixed" values
(> 1 to avoid ONNX squeezing the axis).
This test ensure we are correctly replacing generated batch / sequence tensor with axis > 1
"""
# Dynamic axis (batch, no token added by the tokenizer)
self.assertEqual(compute_effective_axis_dimension(-1, fixed_dimension=2, num_token_to_add=0), 2)
# Static axis (batch, no token added by the tokenizer)
self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=2, num_token_to_add=0), 2)
# Dynamic axis (sequence, token added by the tokenizer 2 (no pair))
self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=8, num_token_to_add=2), 6)
self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=8, num_token_to_add=2), 6)
# Dynamic axis (sequence, token added by the tokenizer 3 (pair))
self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=8, num_token_to_add=3), 5)
self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=8, num_token_to_add=3), 5)
def test_compute_parameters_serialized_size(self):
"""
This test ensures we compute a "correct" approximation of the underlying storage requirement (size) for all the
parameters for the specified parameter's dtype.
"""
self.assertEqual(compute_serialized_parameters_size(2, ParameterFormat.Float), 2 * ParameterFormat.Float.size)
def test_flatten_output_collection_property(self):
"""
This test ensures we correctly flatten nested collection such as the one we use when returning past_keys.
past_keys = Tuple[Tuple]
ONNX exporter will export nested collections as ${collection_name}.${level_idx_0}.${level_idx_1}...${idx_n}
"""
self.assertEqual(
OnnxConfig.flatten_output_collection_property("past_key", [[0], [1], [2]]),
{
"past_key.0": 0,
"past_key.1": 1,
"past_key.2": 2,
},
)
class OnnxConfigTestCaseV2(TestCase):
"""
Cover the test for models default.
Default means no specific features is being enabled on the model.
"""
@patch.multiple(OnnxConfig, __abstractmethods__=set())
def test_use_external_data_format(self):
"""
External data format is required only if the serialized size of the parameters if bigger than 2Gb
"""
TWO_GB_LIMIT = EXTERNAL_DATA_FORMAT_SIZE_LIMIT
# No parameters
self.assertFalse(OnnxConfig.use_external_data_format(0))
# Some parameters
self.assertFalse(OnnxConfig.use_external_data_format(1))
# Almost 2Gb parameters
self.assertFalse(OnnxConfig.use_external_data_format((TWO_GB_LIMIT - 1) // ParameterFormat.Float.size))
# Exactly 2Gb parameters
self.assertTrue(OnnxConfig.use_external_data_format(TWO_GB_LIMIT))
# More than 2Gb parameters
self.assertTrue(OnnxConfig.use_external_data_format((TWO_GB_LIMIT + 1) // ParameterFormat.Float.size))
class OnnxConfigWithPastTestCaseV2(TestCase):
"""
Cover the tests for model which have use_cache feature (i.e. "with_past" for ONNX)
"""
SUPPORTED_WITH_PAST_CONFIGS = {}
# SUPPORTED_WITH_PAST_CONFIGS = {
# ("BART", BartConfig),
# ("GPT2", GPT2Config),
# # ("T5", T5Config)
# }
@patch.multiple(OnnxConfigWithPast, __abstractmethods__=set())
def test_use_past(self):
"""
Ensure the use_past variable is correctly being set
"""
for name, config in OnnxConfigWithPastTestCaseV2.SUPPORTED_WITH_PAST_CONFIGS:
with self.subTest(name):
self.assertFalse(
OnnxConfigWithPast.from_model_config(config()).use_past,
"OnnxConfigWithPast.from_model_config() should not use_past",
)
self.assertTrue(
OnnxConfigWithPast.with_past(config()).use_past,
"OnnxConfigWithPast.from_model_config() should use_past",
)
@patch.multiple(OnnxConfigWithPast, __abstractmethods__=set())
def test_values_override(self):
"""
Ensure the use_past variable correctly set the `use_cache` value in model's configuration
"""
for name, config in OnnxConfigWithPastTestCaseV2.SUPPORTED_WITH_PAST_CONFIGS:
with self.subTest(name):
# without past
onnx_config_default = OnnxConfigWithPast.from_model_config(config())
self.assertIsNotNone(onnx_config_default.values_override, "values_override should not be None")
self.assertIn("use_cache", onnx_config_default.values_override, "use_cache should be present")
self.assertFalse(
onnx_config_default.values_override["use_cache"], "use_cache should be False if not using past"
)
# with past
onnx_config_default = OnnxConfigWithPast.with_past(config())
self.assertIsNotNone(onnx_config_default.values_override, "values_override should not be None")
self.assertIn("use_cache", onnx_config_default.values_override, "use_cache should be present")
self.assertTrue(
onnx_config_default.values_override["use_cache"], "use_cache should be False if not using past"
)
PYTORCH_EXPORT_MODELS = {
("albert", "hf-internal-testing/tiny-albert"),
("bert", "bert-base-cased"),
("ibert", "kssteven/ibert-roberta-base"),
("camembert", "camembert-base"),
("distilbert", "distilbert-base-cased"),
("electra", "google/electra-base-generator"),
("roberta", "roberta-base"),
("xlm-roberta", "xlm-roberta-base"),
("layoutlm", "microsoft/layoutlm-base-uncased"),
}
PYTORCH_EXPORT_WITH_PAST_MODELS = {
("gpt2", "gpt2"),
("gpt-neo", "EleutherAI/gpt-neo-125M"),
}
PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {
("bart", "facebook/bart-base"),
("mbart", "sshleifer/tiny-mbart"),
("t5", "t5-small"),
("marian", "Helsinki-NLP/opus-mt-en-de"),
}
TENSORFLOW_EXPORT_DEFAULT_MODELS = {
("albert", "hf-internal-testing/tiny-albert"),
("bert", "bert-base-cased"),
("ibert", "kssteven/ibert-roberta-base"),
("camembert", "camembert-base"),
("distilbert", "distilbert-base-cased"),
("roberta", "roberta-base"),
("xlm-roberta", "xlm-roberta-base"),
("layoutlm", "microsoft/layoutlm-base-uncased"),
}
TENSORFLOW_EXPORT_WITH_PAST_MODELS = {
("gpt2", "gpt2"),
("gpt-neo", "EleutherAI/gpt-neo-125M"),
}
TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {
("bart", "facebook/bart-base"),
("mbart", "sshleifer/tiny-mbart"),
("t5", "t5-small"),
("marian", "Helsinki-NLP/opus-mt-en-de"),
}
def _get_models_to_test(export_models_list):
models_to_test = []
if is_torch_available() or is_tf_available():
for (name, model) in export_models_list:
for feature, onnx_config_class_constructor in FeaturesManager.get_supported_features_for_model_type(
name
).items():
models_to_test.append((f"{name}_{feature}", name, model, feature, onnx_config_class_constructor))
return sorted(models_to_test)
else:
# Returning some dummy test that should not be ever called because of the @require_torch / @require_tf
# decorators.
# The reason for not returning an empty list is because parameterized.expand complains when it's empty.
return [("dummy", "dummy", "dummy", "dummy", OnnxConfig.from_model_config)]
class OnnxExportTestCaseV2(TestCase):
"""
Integration tests ensuring supported models are correctly exported
"""
def _onnx_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
from transformers.onnx import export
tokenizer = AutoTokenizer.from_pretrained(model_name)
config = AutoConfig.from_pretrained(model_name)
# Useful for causal lm models that do not use pad tokens.
if not getattr(config, "pad_token_id", None):
config.pad_token_id = tokenizer.eos_token_id
model_class = FeaturesManager.get_model_class_for_feature(feature)
model = model_class.from_config(config)
onnx_config = onnx_config_class_constructor(model.config)
with NamedTemporaryFile("w") as output:
try:
onnx_inputs, onnx_outputs = export(
tokenizer, model, onnx_config, onnx_config.default_onnx_opset, Path(output.name)
)
validate_model_outputs(
onnx_config,
tokenizer,
model,
Path(output.name),
onnx_outputs,
onnx_config.atol_for_validation,
)
except (RuntimeError, ValueError) as e:
self.fail(f"{name}, {feature} -> {e}")
@parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS))
@slow
@require_torch
def test_pytorch_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
@parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_WITH_PAST_MODELS))
@slow
@require_torch
def test_pytorch_export_with_past(self, test_name, name, model_name, feature, onnx_config_class_constructor):
self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
@parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS))
@slow
@require_torch
def test_pytorch_export_seq2seq_with_past(
self, test_name, name, model_name, feature, onnx_config_class_constructor
):
self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
@parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_DEFAULT_MODELS))
@slow
@require_tf
def test_tensorflow_export(self, test_name, name, model_name, feature, onnx_config_class_constructor):
self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
@parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_WITH_PAST_MODELS))
@slow
@require_tf
def test_tensorflow_export_with_past(self, test_name, name, model_name, feature, onnx_config_class_constructor):
self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)
@parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS))
@slow
@require_tf
def test_tensorflow_export_seq2seq_with_past(
self, test_name, name, model_name, feature, onnx_config_class_constructor
):
self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor)