[Test refactor 1/5] Per-folder tests reorganization (#15725)

* Per-folder tests reorganization

Co-authored-by: sgugger <sylvain.gugger@gmail.com>
Co-authored-by: Stas Bekman <stas@stason.org>
This commit is contained in:
Lysandre Debut
2022-02-23 15:46:28 -05:00
committed by GitHub
parent fecb08c2b8
commit 29c10a41d0
438 changed files with 636 additions and 565 deletions

0
tests/utils/__init__.py Normal file
View File

View File

@@ -0,0 +1,64 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from transformers import is_torch_available
from transformers.testing_utils import require_torch
if is_torch_available():
import torch
from transformers.activations import gelu_new, gelu_python, get_activation
@require_torch
class TestActivations(unittest.TestCase):
def test_gelu_versions(self):
x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
torch_builtin = get_activation("gelu")
self.assertTrue(torch.allclose(gelu_python(x), torch_builtin(x)))
self.assertFalse(torch.allclose(gelu_python(x), gelu_new(x)))
def test_gelu_10(self):
x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
torch_builtin = get_activation("gelu")
gelu10 = get_activation("gelu_10")
y_gelu = torch_builtin(x)
y_gelu_10 = gelu10(x)
clipped_mask = torch.where(y_gelu_10 < 10.0, 1, 0)
self.assertTrue(torch.max(y_gelu_10).item() == 10.0)
self.assertTrue(torch.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask))
def test_get_activation(self):
get_activation("swish")
get_activation("silu")
get_activation("relu")
get_activation("tanh")
get_activation("gelu_new")
get_activation("gelu_fast")
get_activation("gelu_python")
get_activation("gelu_10")
get_activation("quick_gelu")
get_activation("mish")
get_activation("linear")
get_activation("sigmoid")
with self.assertRaises(KeyError):
get_activation("bogus")
with self.assertRaises(KeyError):
get_activation(None)

View File

@@ -0,0 +1,59 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from transformers import is_tf_available
from transformers.testing_utils import require_tf
if is_tf_available():
import tensorflow as tf
from transformers.activations_tf import get_tf_activation
@require_tf
class TestTFActivations(unittest.TestCase):
def test_gelu_10(self):
x = tf.constant([-100, -1.0, -0.1, 0, 0.1, 1.0, 100.0])
gelu = get_tf_activation("gelu")
gelu10 = get_tf_activation("gelu_10")
y_gelu = gelu(x)
y_gelu_10 = gelu10(x)
clipped_mask = tf.where(y_gelu_10 < 10.0, 1.0, 0.0)
self.assertEqual(tf.math.reduce_max(y_gelu_10).numpy().item(), 10.0)
self.assertTrue(np.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask))
def test_get_activation(self):
get_tf_activation("swish")
get_tf_activation("silu")
get_tf_activation("gelu")
get_tf_activation("relu")
get_tf_activation("tanh")
get_tf_activation("gelu_new")
get_tf_activation("gelu_fast")
get_tf_activation("gelu_10")
get_tf_activation("mish")
get_tf_activation("quick_gelu")
get_tf_activation("glu")
with self.assertRaises(KeyError):
get_tf_activation("bogus")
with self.assertRaises(KeyError):
get_tf_activation(None)

File diff suppressed because it is too large Load Diff

32
tests/utils/test_cli.py Normal file
View File

@@ -0,0 +1,32 @@
# coding=utf-8
# Copyright 2019-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from unittest.mock import patch
from transformers.testing_utils import CaptureStd
class CLITest(unittest.TestCase):
@patch("sys.argv", ["fakeprogrampath", "env"])
def test_cli_env(self):
# test transformers-cli env
import transformers.commands.transformers_cli
with CaptureStd() as cs:
transformers.commands.transformers_cli.main()
self.assertIn("Python version", cs.out)
self.assertIn("Platform", cs.out)
self.assertIn("Using distributed or parallel set-up in script?", cs.out)

View File

@@ -0,0 +1,114 @@
# coding=utf-8
# Copyright 2019-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import doctest
import logging
import os
import unittest
from pathlib import Path
from typing import List, Union
import transformers
from transformers.testing_utils import require_tf, require_torch, slow
logger = logging.getLogger()
@unittest.skip("Temporarily disable the doc tests.")
@require_torch
@require_tf
@slow
class TestCodeExamples(unittest.TestCase):
def analyze_directory(
self,
directory: Path,
identifier: Union[str, None] = None,
ignore_files: Union[List[str], None] = None,
n_identifier: Union[str, List[str], None] = None,
only_modules: bool = True,
):
"""
Runs through the specific directory, looking for the files identified with `identifier`. Executes
the doctests in those files
Args:
directory (`Path`): Directory containing the files
identifier (`str`): Will parse files containing this
ignore_files (`List[str]`): List of files to skip
n_identifier (`str` or `List[str]`): Will not parse files containing this/these identifiers.
only_modules (`bool`): Whether to only analyze modules
"""
files = [file for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
if identifier is not None:
files = [file for file in files if identifier in file]
if n_identifier is not None:
if isinstance(n_identifier, List):
for n_ in n_identifier:
files = [file for file in files if n_ not in file]
else:
files = [file for file in files if n_identifier not in file]
ignore_files = ignore_files or []
ignore_files.append("__init__.py")
files = [file for file in files if file not in ignore_files]
for file in files:
# Open all files
print("Testing", file)
if only_modules:
module_identifier = file.split(".")[0]
try:
module_identifier = getattr(transformers, module_identifier)
suite = doctest.DocTestSuite(module_identifier)
result = unittest.TextTestRunner().run(suite)
self.assertIs(len(result.failures), 0)
except AttributeError:
logger.info(f"{module_identifier} is not a module.")
else:
result = doctest.testfile(str(".." / directory / file), optionflags=doctest.ELLIPSIS)
self.assertIs(result.failed, 0)
def test_modeling_examples(self):
transformers_directory = Path("src/transformers")
files = "modeling"
ignore_files = [
"modeling_ctrl.py",
"modeling_tf_ctrl.py",
]
self.analyze_directory(transformers_directory, identifier=files, ignore_files=ignore_files)
def test_tokenization_examples(self):
transformers_directory = Path("src/transformers")
files = "tokenization"
self.analyze_directory(transformers_directory, identifier=files)
def test_configuration_examples(self):
transformers_directory = Path("src/transformers")
files = "configuration"
self.analyze_directory(transformers_directory, identifier=files)
def test_remaining_examples(self):
transformers_directory = Path("src/transformers")
n_identifiers = ["configuration", "modeling", "tokenization"]
self.analyze_directory(transformers_directory, n_identifier=n_identifiers)
def test_doc_sources(self):
doc_source_directory = Path("docs/source")
ignore_files = ["favicon.ico"]
self.analyze_directory(doc_source_directory, ignore_files=ignore_files, only_modules=False)

View File

@@ -0,0 +1,181 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import importlib
import io
import json
import tempfile
import unittest
from pathlib import Path
import transformers
# Try to import everything from transformers to ensure every object can be loaded.
from transformers import * # noqa F406
from transformers.file_utils import (
CONFIG_NAME,
FLAX_WEIGHTS_NAME,
TF2_WEIGHTS_NAME,
WEIGHTS_NAME,
ContextManagers,
EntryNotFoundError,
RepositoryNotFoundError,
RevisionNotFoundError,
filename_to_url,
get_file_from_repo,
get_from_cache,
has_file,
hf_bucket_url,
)
from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER
MODEL_ID = DUMMY_UNKNOWN_IDENTIFIER
# An actual model hosted on huggingface.co
REVISION_ID_DEFAULT = "main"
# Default branch name
REVISION_ID_ONE_SPECIFIC_COMMIT = "f2c752cfc5c0ab6f4bdec59acea69eefbee381c2"
# One particular commit (not the top of `main`)
REVISION_ID_INVALID = "aaaaaaa"
# This commit does not exist, so we should 404.
PINNED_SHA1 = "d9e9f15bc825e4b2c9249e9578f884bbcb5e3684"
# Sha-1 of config.json on the top of `main`, for checking purposes
PINNED_SHA256 = "4b243c475af8d0a7754e87d7d096c92e5199ec2fe168a2ee7998e3b8e9bcb1d3"
# Sha-256 of pytorch_model.bin on the top of `main`, for checking purposes
# Dummy contexts to test `ContextManagers`
@contextlib.contextmanager
def context_en():
print("Welcome!")
yield
print("Bye!")
@contextlib.contextmanager
def context_fr():
print("Bonjour!")
yield
print("Au revoir!")
class TestImportMechanisms(unittest.TestCase):
def test_module_spec_available(self):
# If the spec is missing, importlib would not be able to import the module dynamically.
assert transformers.__spec__ is not None
assert importlib.util.find_spec("transformers") is not None
class GetFromCacheTests(unittest.TestCase):
def test_bogus_url(self):
# This lets us simulate no connection
# as the error raised is the same
# `ConnectionError`
url = "https://bogus"
with self.assertRaisesRegex(ValueError, "Connection error"):
_ = get_from_cache(url)
def test_file_not_found(self):
# Valid revision (None) but missing file.
url = hf_bucket_url(MODEL_ID, filename="missing.bin")
with self.assertRaisesRegex(EntryNotFoundError, "404 Client Error"):
_ = get_from_cache(url)
def test_model_not_found(self):
# Invalid model file.
url = hf_bucket_url("bert-base", filename="pytorch_model.bin")
with self.assertRaisesRegex(RepositoryNotFoundError, "404 Client Error"):
_ = get_from_cache(url)
def test_revision_not_found(self):
# Valid file but missing revision
url = hf_bucket_url(MODEL_ID, filename=CONFIG_NAME, revision=REVISION_ID_INVALID)
with self.assertRaisesRegex(RevisionNotFoundError, "404 Client Error"):
_ = get_from_cache(url)
def test_standard_object(self):
url = hf_bucket_url(MODEL_ID, filename=CONFIG_NAME, revision=REVISION_ID_DEFAULT)
filepath = get_from_cache(url, force_download=True)
metadata = filename_to_url(filepath)
self.assertEqual(metadata, (url, f'"{PINNED_SHA1}"'))
def test_standard_object_rev(self):
# Same object, but different revision
url = hf_bucket_url(MODEL_ID, filename=CONFIG_NAME, revision=REVISION_ID_ONE_SPECIFIC_COMMIT)
filepath = get_from_cache(url, force_download=True)
metadata = filename_to_url(filepath)
self.assertNotEqual(metadata[1], f'"{PINNED_SHA1}"')
# Caution: check that the etag is *not* equal to the one from `test_standard_object`
def test_lfs_object(self):
url = hf_bucket_url(MODEL_ID, filename=WEIGHTS_NAME, revision=REVISION_ID_DEFAULT)
filepath = get_from_cache(url, force_download=True)
metadata = filename_to_url(filepath)
self.assertEqual(metadata, (url, f'"{PINNED_SHA256}"'))
def test_has_file(self):
self.assertTrue(has_file("hf-internal-testing/tiny-bert-pt-only", WEIGHTS_NAME))
self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", TF2_WEIGHTS_NAME))
self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", FLAX_WEIGHTS_NAME))
def test_get_file_from_repo_distant(self):
# `get_file_from_repo` returns None if the file does not exist
self.assertIsNone(get_file_from_repo("bert-base-cased", "ahah.txt"))
# The function raises if the repository does not exist.
with self.assertRaisesRegex(EnvironmentError, "is not a valid model identifier"):
get_file_from_repo("bert-base-case", "config.json")
# The function raises if the revision does not exist.
with self.assertRaisesRegex(EnvironmentError, "is not a valid git identifier"):
get_file_from_repo("bert-base-cased", "config.json", revision="ahaha")
resolved_file = get_file_from_repo("bert-base-cased", "config.json")
# The name is the cached name which is not very easy to test, so instead we load the content.
config = json.loads(open(resolved_file, "r").read())
self.assertEqual(config["hidden_size"], 768)
def test_get_file_from_repo_local(self):
with tempfile.TemporaryDirectory() as tmp_dir:
filename = Path(tmp_dir) / "a.txt"
filename.touch()
self.assertEqual(get_file_from_repo(tmp_dir, "a.txt"), str(filename))
self.assertIsNone(get_file_from_repo(tmp_dir, "b.txt"))
class ContextManagerTests(unittest.TestCase):
@unittest.mock.patch("sys.stdout", new_callable=io.StringIO)
def test_no_context(self, mock_stdout):
with ContextManagers([]):
print("Transformers are awesome!")
# The print statement adds a new line at the end of the output
self.assertEqual(mock_stdout.getvalue(), "Transformers are awesome!\n")
@unittest.mock.patch("sys.stdout", new_callable=io.StringIO)
def test_one_context(self, mock_stdout):
with ContextManagers([context_en()]):
print("Transformers are awesome!")
# The output should be wrapped with an English welcome and goodbye
self.assertEqual(mock_stdout.getvalue(), "Welcome!\nTransformers are awesome!\nBye!\n")
@unittest.mock.patch("sys.stdout", new_callable=io.StringIO)
def test_two_context(self, mock_stdout):
with ContextManagers([context_fr(), context_en()]):
print("Transformers are awesome!")
# The output should be wrapped with an English and French welcome and goodbye
self.assertEqual(mock_stdout.getvalue(), "Bonjour!\nWelcome!\nTransformers are awesome!\nBye!\nAu revoir!\n")

View File

@@ -0,0 +1,230 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import unittest
from argparse import Namespace
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional
from transformers import HfArgumentParser, TrainingArguments
from transformers.hf_argparser import string_to_bool
def list_field(default=None, metadata=None):
return field(default_factory=lambda: default, metadata=metadata)
@dataclass
class BasicExample:
foo: int
bar: float
baz: str
flag: bool
@dataclass
class WithDefaultExample:
foo: int = 42
baz: str = field(default="toto", metadata={"help": "help message"})
@dataclass
class WithDefaultBoolExample:
foo: bool = False
baz: bool = True
opt: Optional[bool] = None
class BasicEnum(Enum):
titi = "titi"
toto = "toto"
@dataclass
class EnumExample:
foo: BasicEnum = "toto"
def __post_init__(self):
self.foo = BasicEnum(self.foo)
@dataclass
class OptionalExample:
foo: Optional[int] = None
bar: Optional[float] = field(default=None, metadata={"help": "help message"})
baz: Optional[str] = None
ces: Optional[List[str]] = list_field(default=[])
des: Optional[List[int]] = list_field(default=[])
@dataclass
class ListExample:
foo_int: List[int] = list_field(default=[])
bar_int: List[int] = list_field(default=[1, 2, 3])
foo_str: List[str] = list_field(default=["Hallo", "Bonjour", "Hello"])
foo_float: List[float] = list_field(default=[0.1, 0.2, 0.3])
@dataclass
class RequiredExample:
required_list: List[int] = field()
required_str: str = field()
required_enum: BasicEnum = field()
def __post_init__(self):
self.required_enum = BasicEnum(self.required_enum)
class HfArgumentParserTest(unittest.TestCase):
def argparsersEqual(self, a: argparse.ArgumentParser, b: argparse.ArgumentParser) -> bool:
"""
Small helper to check pseudo-equality of parsed arguments on `ArgumentParser` instances.
"""
self.assertEqual(len(a._actions), len(b._actions))
for x, y in zip(a._actions, b._actions):
xx = {k: v for k, v in vars(x).items() if k != "container"}
yy = {k: v for k, v in vars(y).items() if k != "container"}
self.assertEqual(xx, yy)
def test_basic(self):
parser = HfArgumentParser(BasicExample)
expected = argparse.ArgumentParser()
expected.add_argument("--foo", type=int, required=True)
expected.add_argument("--bar", type=float, required=True)
expected.add_argument("--baz", type=str, required=True)
expected.add_argument("--flag", type=string_to_bool, default=False, const=True, nargs="?")
self.argparsersEqual(parser, expected)
args = ["--foo", "1", "--baz", "quux", "--bar", "0.5"]
(example,) = parser.parse_args_into_dataclasses(args, look_for_args_file=False)
self.assertFalse(example.flag)
def test_with_default(self):
parser = HfArgumentParser(WithDefaultExample)
expected = argparse.ArgumentParser()
expected.add_argument("--foo", default=42, type=int)
expected.add_argument("--baz", default="toto", type=str, help="help message")
self.argparsersEqual(parser, expected)
def test_with_default_bool(self):
parser = HfArgumentParser(WithDefaultBoolExample)
expected = argparse.ArgumentParser()
expected.add_argument("--foo", type=string_to_bool, default=False, const=True, nargs="?")
expected.add_argument("--baz", type=string_to_bool, default=True, const=True, nargs="?")
# A boolean no_* argument always has to come after its "default: True" regular counter-part
# and its default must be set to False
expected.add_argument("--no_baz", action="store_false", default=False, dest="baz")
expected.add_argument("--opt", type=string_to_bool, default=None)
self.argparsersEqual(parser, expected)
args = parser.parse_args([])
self.assertEqual(args, Namespace(foo=False, baz=True, opt=None))
args = parser.parse_args(["--foo", "--no_baz"])
self.assertEqual(args, Namespace(foo=True, baz=False, opt=None))
args = parser.parse_args(["--foo", "--baz"])
self.assertEqual(args, Namespace(foo=True, baz=True, opt=None))
args = parser.parse_args(["--foo", "True", "--baz", "True", "--opt", "True"])
self.assertEqual(args, Namespace(foo=True, baz=True, opt=True))
args = parser.parse_args(["--foo", "False", "--baz", "False", "--opt", "False"])
self.assertEqual(args, Namespace(foo=False, baz=False, opt=False))
def test_with_enum(self):
parser = HfArgumentParser(EnumExample)
expected = argparse.ArgumentParser()
expected.add_argument("--foo", default="toto", choices=["titi", "toto"], type=str)
self.argparsersEqual(parser, expected)
args = parser.parse_args([])
self.assertEqual(args.foo, "toto")
enum_ex = parser.parse_args_into_dataclasses([])[0]
self.assertEqual(enum_ex.foo, BasicEnum.toto)
args = parser.parse_args(["--foo", "titi"])
self.assertEqual(args.foo, "titi")
enum_ex = parser.parse_args_into_dataclasses(["--foo", "titi"])[0]
self.assertEqual(enum_ex.foo, BasicEnum.titi)
def test_with_list(self):
parser = HfArgumentParser(ListExample)
expected = argparse.ArgumentParser()
expected.add_argument("--foo_int", nargs="+", default=[], type=int)
expected.add_argument("--bar_int", nargs="+", default=[1, 2, 3], type=int)
expected.add_argument("--foo_str", nargs="+", default=["Hallo", "Bonjour", "Hello"], type=str)
expected.add_argument("--foo_float", nargs="+", default=[0.1, 0.2, 0.3], type=float)
self.argparsersEqual(parser, expected)
args = parser.parse_args([])
self.assertEqual(
args,
Namespace(foo_int=[], bar_int=[1, 2, 3], foo_str=["Hallo", "Bonjour", "Hello"], foo_float=[0.1, 0.2, 0.3]),
)
args = parser.parse_args("--foo_int 1 --bar_int 2 3 --foo_str a b c --foo_float 0.1 0.7".split())
self.assertEqual(args, Namespace(foo_int=[1], bar_int=[2, 3], foo_str=["a", "b", "c"], foo_float=[0.1, 0.7]))
def test_with_optional(self):
parser = HfArgumentParser(OptionalExample)
expected = argparse.ArgumentParser()
expected.add_argument("--foo", default=None, type=int)
expected.add_argument("--bar", default=None, type=float, help="help message")
expected.add_argument("--baz", default=None, type=str)
expected.add_argument("--ces", nargs="+", default=[], type=str)
expected.add_argument("--des", nargs="+", default=[], type=int)
self.argparsersEqual(parser, expected)
args = parser.parse_args([])
self.assertEqual(args, Namespace(foo=None, bar=None, baz=None, ces=[], des=[]))
args = parser.parse_args("--foo 12 --bar 3.14 --baz 42 --ces a b c --des 1 2 3".split())
self.assertEqual(args, Namespace(foo=12, bar=3.14, baz="42", ces=["a", "b", "c"], des=[1, 2, 3]))
def test_with_required(self):
parser = HfArgumentParser(RequiredExample)
expected = argparse.ArgumentParser()
expected.add_argument("--required_list", nargs="+", type=int, required=True)
expected.add_argument("--required_str", type=str, required=True)
expected.add_argument("--required_enum", type=str, choices=["titi", "toto"], required=True)
self.argparsersEqual(parser, expected)
def test_parse_dict(self):
parser = HfArgumentParser(BasicExample)
args_dict = {
"foo": 12,
"bar": 3.14,
"baz": "42",
"flag": True,
}
parsed_args = parser.parse_dict(args_dict)[0]
args = BasicExample(**args_dict)
self.assertEqual(parsed_args, args)
def test_integration_training_args(self):
parser = HfArgumentParser(TrainingArguments)
self.assertIsNotNone(parser)

View File

@@ -0,0 +1,487 @@
# coding=utf-8
# Copyright 2021 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import datasets
import numpy as np
from transformers import is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, require_vision
if is_torch_available():
import torch
if is_vision_available():
import PIL.Image
from transformers import ImageFeatureExtractionMixin
from transformers.image_utils import load_image
def get_random_image(height, width):
random_array = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)
return PIL.Image.fromarray(random_array)
@require_vision
class ImageFeatureExtractionTester(unittest.TestCase):
def test_conversion_image_to_array(self):
feature_extractor = ImageFeatureExtractionMixin()
image = get_random_image(16, 32)
# Conversion with defaults (rescale + channel first)
array1 = feature_extractor.to_numpy_array(image)
self.assertTrue(array1.dtype, np.float32)
self.assertEqual(array1.shape, (3, 16, 32))
# Conversion with rescale and not channel first
array2 = feature_extractor.to_numpy_array(image, channel_first=False)
self.assertTrue(array2.dtype, np.float32)
self.assertEqual(array2.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array1, array2.transpose(2, 0, 1)))
# Conversion with no rescale and channel first
array3 = feature_extractor.to_numpy_array(image, rescale=False)
self.assertTrue(array3.dtype, np.uint8)
self.assertEqual(array3.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array1, array3.astype(np.float32) / 255.0))
# Conversion with no rescale and not channel first
array4 = feature_extractor.to_numpy_array(image, rescale=False, channel_first=False)
self.assertTrue(array4.dtype, np.uint8)
self.assertEqual(array4.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array2, array4.astype(np.float32) / 255.0))
def test_conversion_array_to_array(self):
feature_extractor = ImageFeatureExtractionMixin()
array = np.random.randint(0, 256, (16, 32, 3), dtype=np.uint8)
# By default, rescale (for an array of ints) and channel permute
array1 = feature_extractor.to_numpy_array(array)
self.assertTrue(array1.dtype, np.float32)
self.assertEqual(array1.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) / 255.0))
# Same with no permute
array2 = feature_extractor.to_numpy_array(array, channel_first=False)
self.assertTrue(array2.dtype, np.float32)
self.assertEqual(array2.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array2, array.astype(np.float32) / 255.0))
# Force rescale to False
array3 = feature_extractor.to_numpy_array(array, rescale=False)
self.assertTrue(array3.dtype, np.uint8)
self.assertEqual(array3.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array3, array.transpose(2, 0, 1)))
# Force rescale to False and no channel permute
array4 = feature_extractor.to_numpy_array(array, rescale=False, channel_first=False)
self.assertTrue(array4.dtype, np.uint8)
self.assertEqual(array4.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array4, array))
# Now test the default rescale for a float array (defaults to False)
array5 = feature_extractor.to_numpy_array(array2)
self.assertTrue(array5.dtype, np.float32)
self.assertEqual(array5.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array5, array1))
@require_torch
def test_conversion_torch_to_array(self):
feature_extractor = ImageFeatureExtractionMixin()
tensor = torch.randint(0, 256, (16, 32, 3))
array = tensor.numpy()
# By default, rescale (for a tensor of ints) and channel permute
array1 = feature_extractor.to_numpy_array(array)
self.assertTrue(array1.dtype, np.float32)
self.assertEqual(array1.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) / 255.0))
# Same with no permute
array2 = feature_extractor.to_numpy_array(array, channel_first=False)
self.assertTrue(array2.dtype, np.float32)
self.assertEqual(array2.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array2, array.astype(np.float32) / 255.0))
# Force rescale to False
array3 = feature_extractor.to_numpy_array(array, rescale=False)
self.assertTrue(array3.dtype, np.uint8)
self.assertEqual(array3.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array3, array.transpose(2, 0, 1)))
# Force rescale to False and no channel permute
array4 = feature_extractor.to_numpy_array(array, rescale=False, channel_first=False)
self.assertTrue(array4.dtype, np.uint8)
self.assertEqual(array4.shape, (16, 32, 3))
self.assertTrue(np.array_equal(array4, array))
# Now test the default rescale for a float tensor (defaults to False)
array5 = feature_extractor.to_numpy_array(array2)
self.assertTrue(array5.dtype, np.float32)
self.assertEqual(array5.shape, (3, 16, 32))
self.assertTrue(np.array_equal(array5, array1))
def test_conversion_image_to_image(self):
feature_extractor = ImageFeatureExtractionMixin()
image = get_random_image(16, 32)
# On an image, `to_pil_image1` is a noop.
image1 = feature_extractor.to_pil_image(image)
self.assertTrue(isinstance(image, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image), np.array(image1)))
def test_conversion_array_to_image(self):
feature_extractor = ImageFeatureExtractionMixin()
array = np.random.randint(0, 256, (16, 32, 3), dtype=np.uint8)
# By default, no rescale (for an array of ints)
image1 = feature_extractor.to_pil_image(array)
self.assertTrue(isinstance(image1, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image1), array))
# If the array is channel-first, proper reordering of the channels is done.
image2 = feature_extractor.to_pil_image(array.transpose(2, 0, 1))
self.assertTrue(isinstance(image2, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image2), array))
# If the array has floating type, it's rescaled by default.
image3 = feature_extractor.to_pil_image(array.astype(np.float32) / 255.0)
self.assertTrue(isinstance(image3, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image3), array))
# You can override the default to rescale.
image4 = feature_extractor.to_pil_image(array.astype(np.float32), rescale=False)
self.assertTrue(isinstance(image4, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image4), array))
# And with floats + channel first.
image5 = feature_extractor.to_pil_image(array.transpose(2, 0, 1).astype(np.float32) / 255.0)
self.assertTrue(isinstance(image5, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image5), array))
@require_torch
def test_conversion_tensor_to_image(self):
feature_extractor = ImageFeatureExtractionMixin()
tensor = torch.randint(0, 256, (16, 32, 3))
array = tensor.numpy()
# By default, no rescale (for a tensor of ints)
image1 = feature_extractor.to_pil_image(tensor)
self.assertTrue(isinstance(image1, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image1), array))
# If the tensor is channel-first, proper reordering of the channels is done.
image2 = feature_extractor.to_pil_image(tensor.permute(2, 0, 1))
self.assertTrue(isinstance(image2, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image2), array))
# If the tensor has floating type, it's rescaled by default.
image3 = feature_extractor.to_pil_image(tensor.float() / 255.0)
self.assertTrue(isinstance(image3, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image3), array))
# You can override the default to rescale.
image4 = feature_extractor.to_pil_image(tensor.float(), rescale=False)
self.assertTrue(isinstance(image4, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image4), array))
# And with floats + channel first.
image5 = feature_extractor.to_pil_image(tensor.permute(2, 0, 1).float() / 255.0)
self.assertTrue(isinstance(image5, PIL.Image.Image))
self.assertTrue(np.array_equal(np.array(image5), array))
def test_resize_image_and_array(self):
feature_extractor = ImageFeatureExtractionMixin()
image = get_random_image(16, 32)
array = np.array(image)
# Size can be an int or a tuple of ints.
resized_image = feature_extractor.resize(image, 8)
self.assertTrue(isinstance(resized_image, PIL.Image.Image))
self.assertEqual(resized_image.size, (8, 8))
resized_image1 = feature_extractor.resize(image, (8, 16))
self.assertTrue(isinstance(resized_image1, PIL.Image.Image))
self.assertEqual(resized_image1.size, (8, 16))
# Passing an array converts it to a PIL Image.
resized_image2 = feature_extractor.resize(array, 8)
self.assertTrue(isinstance(resized_image2, PIL.Image.Image))
self.assertEqual(resized_image2.size, (8, 8))
self.assertTrue(np.array_equal(np.array(resized_image), np.array(resized_image2)))
resized_image3 = feature_extractor.resize(image, (8, 16))
self.assertTrue(isinstance(resized_image3, PIL.Image.Image))
self.assertEqual(resized_image3.size, (8, 16))
self.assertTrue(np.array_equal(np.array(resized_image1), np.array(resized_image3)))
def test_resize_image_and_array_non_default_to_square(self):
feature_extractor = ImageFeatureExtractionMixin()
heights_widths = [
# height, width
# square image
(28, 28),
(27, 27),
# rectangular image: h < w
(28, 34),
(29, 35),
# rectangular image: h > w
(34, 28),
(35, 29),
]
# single integer or single integer in tuple/list
sizes = [22, 27, 28, 36, [22], (27,)]
for (height, width), size in zip(heights_widths, sizes):
for max_size in (None, 37, 1000):
image = get_random_image(height, width)
array = np.array(image)
size = size[0] if isinstance(size, (list, tuple)) else size
# Size can be an int or a tuple of ints.
# If size is an int, smaller edge of the image will be matched to this number.
# i.e, if height > width, then image will be rescaled to (size * height / width, size).
if height < width:
exp_w, exp_h = (int(size * width / height), size)
if max_size is not None and max_size < exp_w:
exp_w, exp_h = max_size, int(max_size * exp_h / exp_w)
elif width < height:
exp_w, exp_h = (size, int(size * height / width))
if max_size is not None and max_size < exp_h:
exp_w, exp_h = int(max_size * exp_w / exp_h), max_size
else:
exp_w, exp_h = (size, size)
if max_size is not None and max_size < size:
exp_w, exp_h = max_size, max_size
resized_image = feature_extractor.resize(image, size=size, default_to_square=False, max_size=max_size)
self.assertTrue(isinstance(resized_image, PIL.Image.Image))
self.assertEqual(resized_image.size, (exp_w, exp_h))
# Passing an array converts it to a PIL Image.
resized_image2 = feature_extractor.resize(array, size=size, default_to_square=False, max_size=max_size)
self.assertTrue(isinstance(resized_image2, PIL.Image.Image))
self.assertEqual(resized_image2.size, (exp_w, exp_h))
self.assertTrue(np.array_equal(np.array(resized_image), np.array(resized_image2)))
@require_torch
def test_resize_tensor(self):
feature_extractor = ImageFeatureExtractionMixin()
tensor = torch.randint(0, 256, (16, 32, 3))
array = tensor.numpy()
# Size can be an int or a tuple of ints.
resized_image = feature_extractor.resize(tensor, 8)
self.assertTrue(isinstance(resized_image, PIL.Image.Image))
self.assertEqual(resized_image.size, (8, 8))
resized_image1 = feature_extractor.resize(tensor, (8, 16))
self.assertTrue(isinstance(resized_image1, PIL.Image.Image))
self.assertEqual(resized_image1.size, (8, 16))
# Check we get the same results as with NumPy arrays.
resized_image2 = feature_extractor.resize(array, 8)
self.assertTrue(np.array_equal(np.array(resized_image), np.array(resized_image2)))
resized_image3 = feature_extractor.resize(array, (8, 16))
self.assertTrue(np.array_equal(np.array(resized_image1), np.array(resized_image3)))
def test_normalize_image(self):
feature_extractor = ImageFeatureExtractionMixin()
image = get_random_image(16, 32)
array = np.array(image)
mean = [0.1, 0.5, 0.9]
std = [0.2, 0.4, 0.6]
# PIL Image are converted to NumPy arrays for the normalization
normalized_image = feature_extractor.normalize(image, mean, std)
self.assertTrue(isinstance(normalized_image, np.ndarray))
self.assertEqual(normalized_image.shape, (3, 16, 32))
# During the conversion rescale and channel first will be applied.
expected = array.transpose(2, 0, 1).astype(np.float32) / 255.0
np_mean = np.array(mean).astype(np.float32)[:, None, None]
np_std = np.array(std).astype(np.float32)[:, None, None]
expected = (expected - np_mean) / np_std
self.assertTrue(np.array_equal(normalized_image, expected))
def test_normalize_array(self):
feature_extractor = ImageFeatureExtractionMixin()
array = np.random.random((16, 32, 3))
mean = [0.1, 0.5, 0.9]
std = [0.2, 0.4, 0.6]
# mean and std can be passed as lists or NumPy arrays.
expected = (array - np.array(mean)) / np.array(std)
normalized_array = feature_extractor.normalize(array, mean, std)
self.assertTrue(np.array_equal(normalized_array, expected))
normalized_array = feature_extractor.normalize(array, np.array(mean), np.array(std))
self.assertTrue(np.array_equal(normalized_array, expected))
# Normalize will detect automatically if channel first or channel last is used.
array = np.random.random((3, 16, 32))
expected = (array - np.array(mean)[:, None, None]) / np.array(std)[:, None, None]
normalized_array = feature_extractor.normalize(array, mean, std)
self.assertTrue(np.array_equal(normalized_array, expected))
normalized_array = feature_extractor.normalize(array, np.array(mean), np.array(std))
self.assertTrue(np.array_equal(normalized_array, expected))
@require_torch
def test_normalize_tensor(self):
feature_extractor = ImageFeatureExtractionMixin()
tensor = torch.rand(16, 32, 3)
mean = [0.1, 0.5, 0.9]
std = [0.2, 0.4, 0.6]
# mean and std can be passed as lists or tensors.
expected = (tensor - torch.tensor(mean)) / torch.tensor(std)
normalized_tensor = feature_extractor.normalize(tensor, mean, std)
self.assertTrue(torch.equal(normalized_tensor, expected))
normalized_tensor = feature_extractor.normalize(tensor, torch.tensor(mean), torch.tensor(std))
self.assertTrue(torch.equal(normalized_tensor, expected))
# Normalize will detect automatically if channel first or channel last is used.
tensor = torch.rand(3, 16, 32)
expected = (tensor - torch.tensor(mean)[:, None, None]) / torch.tensor(std)[:, None, None]
normalized_tensor = feature_extractor.normalize(tensor, mean, std)
self.assertTrue(torch.equal(normalized_tensor, expected))
normalized_tensor = feature_extractor.normalize(tensor, torch.tensor(mean), torch.tensor(std))
self.assertTrue(torch.equal(normalized_tensor, expected))
def test_center_crop_image(self):
feature_extractor = ImageFeatureExtractionMixin()
image = get_random_image(16, 32)
# Test various crop sizes: bigger on all dimensions, on one of the dimensions only and on both dimensions.
crop_sizes = [8, (8, 64), 20, (32, 64)]
for size in crop_sizes:
cropped_image = feature_extractor.center_crop(image, size)
self.assertTrue(isinstance(cropped_image, PIL.Image.Image))
# PIL Image.size is transposed compared to NumPy or PyTorch (width first instead of height first).
expected_size = (size, size) if isinstance(size, int) else (size[1], size[0])
self.assertEqual(cropped_image.size, expected_size)
def test_center_crop_array(self):
feature_extractor = ImageFeatureExtractionMixin()
image = get_random_image(16, 32)
array = feature_extractor.to_numpy_array(image)
# Test various crop sizes: bigger on all dimensions, on one of the dimensions only and on both dimensions.
crop_sizes = [8, (8, 64), 20, (32, 64)]
for size in crop_sizes:
cropped_array = feature_extractor.center_crop(array, size)
self.assertTrue(isinstance(cropped_array, np.ndarray))
expected_size = (size, size) if isinstance(size, int) else size
self.assertEqual(cropped_array.shape[-2:], expected_size)
# Check result is consistent with PIL.Image.crop
cropped_image = feature_extractor.center_crop(image, size)
self.assertTrue(np.array_equal(cropped_array, feature_extractor.to_numpy_array(cropped_image)))
@require_torch
def test_center_crop_tensor(self):
feature_extractor = ImageFeatureExtractionMixin()
image = get_random_image(16, 32)
array = feature_extractor.to_numpy_array(image)
tensor = torch.tensor(array)
# Test various crop sizes: bigger on all dimensions, on one of the dimensions only and on both dimensions.
crop_sizes = [8, (8, 64), 20, (32, 64)]
for size in crop_sizes:
cropped_tensor = feature_extractor.center_crop(tensor, size)
self.assertTrue(isinstance(cropped_tensor, torch.Tensor))
expected_size = (size, size) if isinstance(size, int) else size
self.assertEqual(cropped_tensor.shape[-2:], expected_size)
# Check result is consistent with PIL.Image.crop
cropped_image = feature_extractor.center_crop(image, size)
self.assertTrue(torch.equal(cropped_tensor, torch.tensor(feature_extractor.to_numpy_array(cropped_image))))
@require_vision
class LoadImageTester(unittest.TestCase):
def test_load_img_local(self):
img = load_image("./tests/fixtures/tests_samples/COCO/000000039769.png")
img_arr = np.array(img)
self.assertEqual(
img_arr.shape,
(480, 640, 3),
)
def test_load_img_rgba(self):
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
img = load_image(dataset[0]["file"]) # img with mode RGBA
img_arr = np.array(img)
self.assertEqual(
img_arr.shape,
(512, 512, 3),
)
def test_load_img_la(self):
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
img = load_image(dataset[1]["file"]) # img with mode LA
img_arr = np.array(img)
self.assertEqual(
img_arr.shape,
(512, 768, 3),
)
def test_load_img_l(self):
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
img = load_image(dataset[2]["file"]) # img with mode L
img_arr = np.array(img)
self.assertEqual(
img_arr.shape,
(381, 225, 3),
)
def test_load_img_exif_transpose(self):
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
img_file = dataset[3]["file"]
img_without_exif_transpose = PIL.Image.open(img_file)
img_arr_without_exif_transpose = np.array(img_without_exif_transpose)
self.assertEqual(
img_arr_without_exif_transpose.shape,
(333, 500, 3),
)
img_with_exif_transpose = load_image(img_file)
img_arr_with_exif_transpose = np.array(img_with_exif_transpose)
self.assertEqual(
img_arr_with_exif_transpose.shape,
(500, 333, 3),
)

139
tests/utils/test_logging.py Normal file
View File

@@ -0,0 +1,139 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
from unittest.mock import patch
import transformers.models.bart.tokenization_bart
from transformers import AutoConfig, logging
from transformers.testing_utils import CaptureLogger, mockenv, mockenv_context
from transformers.utils.logging import disable_progress_bar, enable_progress_bar
class HfArgumentParserTest(unittest.TestCase):
def test_set_level(self):
logger = logging.get_logger()
# the current default level is logging.WARNING
level_origin = logging.get_verbosity()
logging.set_verbosity_error()
self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())
logging.set_verbosity_warning()
self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())
logging.set_verbosity_info()
self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())
logging.set_verbosity_debug()
self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())
# restore to the original level
logging.set_verbosity(level_origin)
def test_integration(self):
level_origin = logging.get_verbosity()
logger = logging.get_logger("transformers.models.bart.tokenization_bart")
msg = "Testing 1, 2, 3"
# should be able to log warnings (if default settings weren't overridden by `pytest --log-level-all`)
if level_origin <= logging.WARNING:
with CaptureLogger(logger) as cl:
logger.warning(msg)
self.assertEqual(cl.out, msg + "\n")
# this is setting the level for all of `transformers.*` loggers
logging.set_verbosity_error()
# should not be able to log warnings
with CaptureLogger(logger) as cl:
logger.warning(msg)
self.assertEqual(cl.out, "")
# should be able to log warnings again
logging.set_verbosity_warning()
with CaptureLogger(logger) as cl:
logger.warning(msg)
self.assertEqual(cl.out, msg + "\n")
# restore to the original level
logging.set_verbosity(level_origin)
@mockenv(TRANSFORMERS_VERBOSITY="error")
def test_env_override(self):
# reset for the env var to take effect, next time some logger call is made
transformers.utils.logging._reset_library_root_logger()
# this action activates the env var
_ = logging.get_logger("transformers.models.bart.tokenization_bart")
env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
env_level = logging.log_levels[env_level_str]
current_level = logging.get_verbosity()
self.assertEqual(
env_level,
current_level,
f"TRANSFORMERS_VERBOSITY={env_level_str}/{env_level}, but internal verbosity is {current_level}",
)
# restore to the original level
os.environ["TRANSFORMERS_VERBOSITY"] = ""
transformers.utils.logging._reset_library_root_logger()
@mockenv(TRANSFORMERS_VERBOSITY="super-error")
def test_env_invalid_override(self):
# reset for the env var to take effect, next time some logger call is made
transformers.utils.logging._reset_library_root_logger()
logger = logging.logging.getLogger()
with CaptureLogger(logger) as cl:
# this action activates the env var
logging.get_logger("transformers.models.bart.tokenization_bart")
self.assertIn("Unknown option TRANSFORMERS_VERBOSITY=super-error", cl.out)
# no need to restore as nothing was changed
def test_advisory_warnings(self):
# testing `logger.warning_advice()`
logger = logging.get_logger("transformers.models.bart.tokenization_bart")
msg = "Testing 1, 2, 3"
with mockenv_context(TRANSFORMERS_NO_ADVISORY_WARNINGS="1"):
# nothing should be logged as env var disables this method
with CaptureLogger(logger) as cl:
logger.warning_advice(msg)
self.assertEqual(cl.out, "")
with mockenv_context(TRANSFORMERS_NO_ADVISORY_WARNINGS=""):
# should log normally as TRANSFORMERS_NO_ADVISORY_WARNINGS is unset
with CaptureLogger(logger) as cl:
logger.warning_advice(msg)
self.assertEqual(cl.out, msg + "\n")
def test_set_progress_bar_enabled():
TINY_MODEL = "hf-internal-testing/tiny-random-distilbert"
with patch("tqdm.auto.tqdm") as mock_tqdm:
disable_progress_bar()
_ = AutoConfig.from_pretrained(TINY_MODEL, force_download=True)
mock_tqdm.assert_not_called()
mock_tqdm.reset_mock()
enable_progress_bar()
_ = AutoConfig.from_pretrained(TINY_MODEL, force_download=True)
mock_tqdm.assert_called()

View File

@@ -0,0 +1,81 @@
# coding=utf-8
# Copyright 2019 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import tempfile
import unittest
from transformers.modelcard import ModelCard
class ModelCardTester(unittest.TestCase):
def setUp(self):
self.inputs_dict = {
"model_details": {
"Organization": "testing",
"Model date": "today",
"Model version": "v2.1, Developed by Test Corp in 2019.",
"Architecture": "Convolutional Neural Network.",
},
"metrics": "BLEU and ROUGE-1",
"evaluation_data": {
"Datasets": {"BLEU": "My-great-dataset-v1", "ROUGE-1": "My-short-dataset-v2.1"},
"Preprocessing": "See details on https://arxiv.org/pdf/1810.03993.pdf",
},
"training_data": {
"Dataset": "English Wikipedia dump dated 2018-12-01",
"Preprocessing": "Using SentencePiece vocabulary of size 52k tokens. See details on https://arxiv.org/pdf/1810.03993.pdf",
},
"quantitative_analyses": {"BLEU": 55.1, "ROUGE-1": 76},
}
def test_model_card_common_properties(self):
modelcard = ModelCard.from_dict(self.inputs_dict)
self.assertTrue(hasattr(modelcard, "model_details"))
self.assertTrue(hasattr(modelcard, "intended_use"))
self.assertTrue(hasattr(modelcard, "factors"))
self.assertTrue(hasattr(modelcard, "metrics"))
self.assertTrue(hasattr(modelcard, "evaluation_data"))
self.assertTrue(hasattr(modelcard, "training_data"))
self.assertTrue(hasattr(modelcard, "quantitative_analyses"))
self.assertTrue(hasattr(modelcard, "ethical_considerations"))
self.assertTrue(hasattr(modelcard, "caveats_and_recommendations"))
def test_model_card_to_json_string(self):
modelcard = ModelCard.from_dict(self.inputs_dict)
obj = json.loads(modelcard.to_json_string())
for key, value in self.inputs_dict.items():
self.assertEqual(obj[key], value)
def test_model_card_to_json_file(self):
model_card_first = ModelCard.from_dict(self.inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
filename = os.path.join(tmpdirname, "modelcard.json")
model_card_first.to_json_file(filename)
model_card_second = ModelCard.from_json_file(filename)
self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
def test_model_card_from_and_save_pretrained(self):
model_card_first = ModelCard.from_dict(self.inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
model_card_first.save_pretrained(tmpdirname)
model_card_second = ModelCard.from_pretrained(tmpdirname)
self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())

View File

@@ -0,0 +1,109 @@
# coding=utf-8
# Copyright 2020 The Hugging Face Team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from dataclasses import dataclass
from typing import Optional
from transformers.file_utils import ModelOutput
@dataclass
class ModelOutputTest(ModelOutput):
a: float
b: Optional[float] = None
c: Optional[float] = None
class ModelOutputTester(unittest.TestCase):
def test_get_attributes(self):
x = ModelOutputTest(a=30)
self.assertEqual(x.a, 30)
self.assertIsNone(x.b)
self.assertIsNone(x.c)
with self.assertRaises(AttributeError):
_ = x.d
def test_index_with_ints_and_slices(self):
x = ModelOutputTest(a=30, b=10)
self.assertEqual(x[0], 30)
self.assertEqual(x[1], 10)
self.assertEqual(x[:2], (30, 10))
self.assertEqual(x[:], (30, 10))
x = ModelOutputTest(a=30, c=10)
self.assertEqual(x[0], 30)
self.assertEqual(x[1], 10)
self.assertEqual(x[:2], (30, 10))
self.assertEqual(x[:], (30, 10))
def test_index_with_strings(self):
x = ModelOutputTest(a=30, b=10)
self.assertEqual(x["a"], 30)
self.assertEqual(x["b"], 10)
with self.assertRaises(KeyError):
_ = x["c"]
x = ModelOutputTest(a=30, c=10)
self.assertEqual(x["a"], 30)
self.assertEqual(x["c"], 10)
with self.assertRaises(KeyError):
_ = x["b"]
def test_dict_like_properties(self):
x = ModelOutputTest(a=30)
self.assertEqual(list(x.keys()), ["a"])
self.assertEqual(list(x.values()), [30])
self.assertEqual(list(x.items()), [("a", 30)])
self.assertEqual(list(x), ["a"])
x = ModelOutputTest(a=30, b=10)
self.assertEqual(list(x.keys()), ["a", "b"])
self.assertEqual(list(x.values()), [30, 10])
self.assertEqual(list(x.items()), [("a", 30), ("b", 10)])
self.assertEqual(list(x), ["a", "b"])
x = ModelOutputTest(a=30, c=10)
self.assertEqual(list(x.keys()), ["a", "c"])
self.assertEqual(list(x.values()), [30, 10])
self.assertEqual(list(x.items()), [("a", 30), ("c", 10)])
self.assertEqual(list(x), ["a", "c"])
with self.assertRaises(Exception):
x = x.update({"d": 20})
with self.assertRaises(Exception):
del x["a"]
with self.assertRaises(Exception):
_ = x.pop("a")
with self.assertRaises(Exception):
_ = x.setdefault("d", 32)
def test_set_attributes(self):
x = ModelOutputTest(a=30)
x.a = 10
self.assertEqual(x.a, 10)
self.assertEqual(x["a"], 10)
def test_set_keys(self):
x = ModelOutputTest(a=30)
x["a"] = 10
self.assertEqual(x.a, 10)
self.assertEqual(x["a"], 10)
def test_instantiate_from_dict(self):
x = ModelOutputTest({"a": 30, "b": 10})
self.assertEqual(list(x.keys()), ["a", "b"])
self.assertEqual(x.a, 30)
self.assertEqual(x.b, 10)

View File

@@ -0,0 +1,357 @@
# coding=utf-8
# Copyright 2019 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import os
import tempfile
from importlib import import_module
from transformers import is_tf_available
from transformers.models.auto import get_values
from transformers.testing_utils import _tf_gpu_memory_limit, require_tf, slow
from ..test_modeling_tf_common import ids_tensor
if is_tf_available():
import numpy as np
import tensorflow as tf
from transformers import (
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING,
TF_MODEL_FOR_PRETRAINING_MAPPING,
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
TFSharedEmbeddings,
)
if _tf_gpu_memory_limit is not None:
gpus = tf.config.list_physical_devices("GPU")
for gpu in gpus:
# Restrict TensorFlow to only allocate x GB of memory on the GPUs
try:
tf.config.set_logical_device_configuration(
gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]
)
logical_gpus = tf.config.list_logical_devices("GPU")
print("Logical GPUs", logical_gpus)
except RuntimeError as e:
# Virtual devices must be set before GPUs have been initialized
print(e)
@require_tf
class TFCoreModelTesterMixin:
model_tester = None
all_model_classes = ()
all_generative_model_classes = ()
test_mismatched_shapes = True
test_resize_embeddings = True
test_head_masking = True
is_encoder_decoder = False
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:
inputs_dict = copy.deepcopy(inputs_dict)
if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
inputs_dict = {
k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1))
if isinstance(v, tf.Tensor) and v.ndim > 0
else v
for k, v in inputs_dict.items()
}
if return_labels:
if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32)
elif model_class in get_values(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING):
inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
elif model_class in [
*get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
*get_values(TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),
]:
inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
elif model_class in get_values(TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING):
inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
elif model_class in [
*get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
*get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING),
*get_values(TF_MODEL_FOR_MASKED_LM_MAPPING),
*get_values(TF_MODEL_FOR_PRETRAINING_MAPPING),
*get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
]:
inputs_dict["labels"] = tf.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32
)
return inputs_dict
@slow
def test_graph_mode(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)
@tf.function
def run_in_graph_mode():
return model(inputs)
outputs = run_in_graph_mode()
self.assertIsNotNone(outputs)
@slow
def test_xla_mode(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)
@tf.function(experimental_compile=True)
def run_in_graph_mode():
return model(inputs)
outputs = run_in_graph_mode()
self.assertIsNotNone(outputs)
@slow
def test_saved_model_creation(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.output_hidden_states = False
config.output_attentions = False
if hasattr(config, "use_cache"):
config.use_cache = False
model_class = self.all_model_classes[0]
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)
model(class_inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
self.assertTrue(os.path.exists(saved_model_dir))
@slow
def test_saved_model_creation_extended(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.output_hidden_states = True
config.output_attentions = True
if hasattr(config, "use_cache"):
config.use_cache = True
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length)
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)
for model_class in self.all_model_classes:
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)
num_out = len(model(class_inputs_dict))
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
if self.is_encoder_decoder:
output_hidden_states = outputs["encoder_hidden_states"]
output_attentions = outputs["encoder_attentions"]
else:
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
self.assertEqual(len(outputs), num_out)
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(output_hidden_states), expected_num_layers)
self.assertListEqual(
list(output_hidden_states[0].shape[-2:]),
[self.model_tester.seq_length, self.model_tester.hidden_size],
)
self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(output_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
)
@slow
def test_mixed_precision(self):
tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)
outputs = model(class_inputs_dict)
self.assertIsNotNone(outputs)
tf.keras.mixed_precision.experimental.set_policy("float32")
@slow
def test_train_pipeline_custom_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# head_mask and decoder_head_mask has different shapes than other input args
if "head_mask" in inputs_dict:
del inputs_dict["head_mask"]
if "decoder_head_mask" in inputs_dict:
del inputs_dict["decoder_head_mask"]
if "cross_attn_head_mask" in inputs_dict:
del inputs_dict["cross_attn_head_mask"]
tf_main_layer_classes = set(
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
for module_member_name in dir(module)
if module_member_name.endswith("MainLayer")
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
)
for main_layer_class in tf_main_layer_classes:
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
if "T5" in main_layer_class.__name__:
# Take the same values than in TFT5ModelTester for this shared layer
shared = TFSharedEmbeddings(self.model_tester.vocab_size, self.model_tester.hidden_size, name="shared")
config.use_cache = False
main_layer = main_layer_class(config, embed_tokens=shared)
else:
main_layer = main_layer_class(config)
symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}
if hasattr(self.model_tester, "num_labels"):
num_labels = self.model_tester.num_labels
else:
num_labels = 2
X = tf.data.Dataset.from_tensor_slices(
(inputs_dict, np.ones((self.model_tester.batch_size, self.model_tester.seq_length, num_labels, 1)))
).batch(1)
hidden_states = main_layer(symbolic_inputs)[0]
outputs = tf.keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states)
model = tf.keras.models.Model(inputs=symbolic_inputs, outputs=[outputs])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["binary_accuracy"])
model.fit(X, epochs=1)
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
if "T5" in main_layer_class.__name__:
model = tf.keras.models.load_model(
filepath,
custom_objects={
main_layer_class.__name__: main_layer_class,
"TFSharedEmbeddings": TFSharedEmbeddings,
},
)
else:
model = tf.keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
model(inputs_dict)
@slow
def test_graph_mode_with_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
inputs = copy.deepcopy(inputs_dict)
if not self.is_encoder_decoder:
input_ids = inputs["input_ids"]
del inputs["input_ids"]
else:
encoder_input_ids = inputs["input_ids"]
decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
del inputs["input_ids"]
inputs.pop("decoder_input_ids", None)
if not self.is_encoder_decoder:
inputs["inputs_embeds"] = model.get_input_embeddings()(input_ids)
else:
inputs["inputs_embeds"] = model.get_input_embeddings()(encoder_input_ids)
inputs["decoder_inputs_embeds"] = model.get_input_embeddings()(decoder_input_ids)
inputs = self._prepare_for_class(inputs, model_class)
@tf.function
def run_in_graph_mode():
return model(inputs)
outputs = run_in_graph_mode()
self.assertIsNotNone(outputs)
def _generate_random_bad_tokens(self, num_bad_tokens, model):
# special tokens cannot be bad tokens
special_tokens = []
if model.config.bos_token_id is not None:
special_tokens.append(model.config.bos_token_id)
if model.config.pad_token_id is not None:
special_tokens.append(model.config.pad_token_id)
if model.config.eos_token_id is not None:
special_tokens.append(model.config.eos_token_id)
# create random bad tokens that are not special tokens
bad_tokens = []
while len(bad_tokens) < num_bad_tokens:
token = tf.squeeze(ids_tensor((1, 1), self.model_tester.vocab_size), 0).numpy()[0]
if token not in special_tokens:
bad_tokens.append(token)
return bad_tokens
def _check_generated_ids(self, output_ids):
for token_id in output_ids[0].numpy().tolist():
self.assertGreaterEqual(token_id, 0)
self.assertLess(token_id, self.model_tester.vocab_size)
def _check_match_tokens(self, generated_ids, bad_words_ids):
# for all bad word tokens
for bad_word_ids in bad_words_ids:
# for all slices in batch
for generated_ids_slice in generated_ids:
# for all word idx
for i in range(len(bad_word_ids), len(generated_ids_slice)):
# if tokens match
if generated_ids_slice[i - len(bad_word_ids) : i] == bad_word_ids:
return True
return False

View File

@@ -0,0 +1,71 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import subprocess
import sys
from transformers.testing_utils import TestCasePlus, require_torch
class OfflineTests(TestCasePlus):
@require_torch
def test_offline_mode(self):
# this test is a bit tricky since TRANSFORMERS_OFFLINE can only be changed before
# `transformers` is loaded, and it's too late for inside pytest - so we are changing it
# while running an external program
# python one-liner segments
# this must be loaded before socket.socket is monkey-patched
load = """
from transformers import BertConfig, BertModel, BertTokenizer
"""
run = """
mname = "lysandre/tiny-bert-random"
BertConfig.from_pretrained(mname)
BertModel.from_pretrained(mname)
BertTokenizer.from_pretrained(mname)
print("success")
"""
mock = """
import socket
def offline_socket(*args, **kwargs): raise socket.error("Offline mode is enabled")
socket.socket = offline_socket
"""
# baseline - just load from_pretrained with normal network
cmd = [sys.executable, "-c", "\n".join([load, run])]
# should succeed
env = self.get_env()
result = subprocess.run(cmd, env=env, check=False, capture_output=True)
self.assertEqual(result.returncode, 0, result.stderr)
self.assertIn("success", result.stdout.decode())
# next emulate no network
cmd = [sys.executable, "-c", "\n".join([load, mock, run])]
# should normally fail as it will fail to lookup the model files w/o the network
env["TRANSFORMERS_OFFLINE"] = "0"
result = subprocess.run(cmd, env=env, check=False, capture_output=True)
self.assertEqual(result.returncode, 1, result.stderr)
# should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files
env["TRANSFORMERS_OFFLINE"] = "1"
result = subprocess.run(cmd, env=env, check=False, capture_output=True)
self.assertEqual(result.returncode, 0, result.stderr)
self.assertIn("success", result.stdout.decode())

View File

@@ -0,0 +1,120 @@
# coding=utf-8
# Copyright 2019-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
#
# this test validates that we can stack skip decorators in groups and whether
# they work correctly with other decorators
#
# since the decorators have already built their decision params (like checking
# env[], we can't mock the env and test each of the combinations), so ideally
# the following 4 should be run. But since we have different CI jobs running
# different configs, all combinations should get covered
#
# RUN_SLOW=1 pytest -rA tests/test_skip_decorators.py
# RUN_SLOW=1 CUDA_VISIBLE_DEVICES="" pytest -rA tests/test_skip_decorators.py
# RUN_SLOW=0 pytest -rA tests/test_skip_decorators.py
# RUN_SLOW=0 CUDA_VISIBLE_DEVICES="" pytest -rA tests/test_skip_decorators.py
import os
import unittest
import pytest
from parameterized import parameterized
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
# skipping in unittest tests
params = [(1,)]
# test that we can stack our skip decorators with 3rd party decorators
def check_slow():
run_slow = bool(os.getenv("RUN_SLOW", 0))
if run_slow:
assert True
else:
assert False, "should have been skipped"
# test that we can stack our skip decorators
def check_slow_torch_cuda():
run_slow = bool(os.getenv("RUN_SLOW", 0))
if run_slow and torch_device == "cuda":
assert True
else:
assert False, "should have been skipped"
@require_torch
class SkipTester(unittest.TestCase):
@slow
@require_torch_gpu
def test_2_skips_slow_first(self):
check_slow_torch_cuda()
@require_torch_gpu
@slow
def test_2_skips_slow_last(self):
check_slow_torch_cuda()
# The combination of any skip decorator, followed by parameterized fails to skip the tests
# 1. @slow manages to correctly skip `test_param_slow_first`
# 2. but then `parameterized` creates new tests, with a unique name for each parameter groups.
# It has no idea that they are to be skipped and so they all run, ignoring @slow
# Therefore skip decorators must come after `parameterized`
#
# @slow
# @parameterized.expand(params)
# def test_param_slow_first(self, param=None):
# check_slow()
# This works as expected:
# 1. `parameterized` creates new tests with unique names
# 2. each of them gets an opportunity to be skipped
@parameterized.expand(params)
@slow
def test_param_slow_last(self, param=None):
check_slow()
# skipping in non-unittest tests
# no problem at all here
@slow
@require_torch_gpu
def test_pytest_2_skips_slow_first():
check_slow_torch_cuda()
@require_torch_gpu
@slow
def test_pytest_2_skips_slow_last():
check_slow_torch_cuda()
@slow
@pytest.mark.parametrize("param", [1])
def test_pytest_param_slow_first(param):
check_slow()
@pytest.mark.parametrize("param", [1])
@slow
def test_pytest_param_slow_last(param):
check_slow()

View File

@@ -0,0 +1,155 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import shutil
import sys
import tempfile
import unittest
import black
git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
sys.path.append(os.path.join(git_repo_path, "utils"))
import check_copies # noqa: E402
# This is the reference code that will be used in the tests.
# If BertLMPredictionHead is changed in modeling_bert.py, this code needs to be manually updated.
REFERENCE_CODE = """ def __init__(self, config):
super().__init__()
self.transform = BertPredictionHeadTransform(config)
# The output weights are the same as the input embeddings, but there is
# an output-only bias for each token.
self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
self.bias = nn.Parameter(torch.zeros(config.vocab_size))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self.decoder.bias = self.bias
def forward(self, hidden_states):
hidden_states = self.transform(hidden_states)
hidden_states = self.decoder(hidden_states)
return hidden_states
"""
class CopyCheckTester(unittest.TestCase):
def setUp(self):
self.transformer_dir = tempfile.mkdtemp()
os.makedirs(os.path.join(self.transformer_dir, "models/bert/"))
check_copies.TRANSFORMER_PATH = self.transformer_dir
shutil.copy(
os.path.join(git_repo_path, "src/transformers/models/bert/modeling_bert.py"),
os.path.join(self.transformer_dir, "models/bert/modeling_bert.py"),
)
def tearDown(self):
check_copies.TRANSFORMER_PATH = "src/transformers"
shutil.rmtree(self.transformer_dir)
def check_copy_consistency(self, comment, class_name, class_code, overwrite_result=None):
code = comment + f"\nclass {class_name}(nn.Module):\n" + class_code
if overwrite_result is not None:
expected = comment + f"\nclass {class_name}(nn.Module):\n" + overwrite_result
mode = black.Mode(target_versions={black.TargetVersion.PY35}, line_length=119)
code = black.format_str(code, mode=mode)
fname = os.path.join(self.transformer_dir, "new_code.py")
with open(fname, "w", newline="\n") as f:
f.write(code)
if overwrite_result is None:
self.assertTrue(len(check_copies.is_copy_consistent(fname)) == 0)
else:
check_copies.is_copy_consistent(f.name, overwrite=True)
with open(fname, "r") as f:
self.assertTrue(f.read(), expected)
def test_find_code_in_transformers(self):
code = check_copies.find_code_in_transformers("models.bert.modeling_bert.BertLMPredictionHead")
self.assertEqual(code, REFERENCE_CODE)
def test_is_copy_consistent(self):
# Base copy consistency
self.check_copy_consistency(
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead",
"BertLMPredictionHead",
REFERENCE_CODE + "\n",
)
# With no empty line at the end
self.check_copy_consistency(
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead",
"BertLMPredictionHead",
REFERENCE_CODE,
)
# Copy consistency with rename
self.check_copy_consistency(
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"TestModelLMPredictionHead",
re.sub("Bert", "TestModel", REFERENCE_CODE),
)
# Copy consistency with a really long name
long_class_name = "TestModelWithAReallyLongNameBecauseSomePeopleLikeThatForSomeReason"
self.check_copy_consistency(
f"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->{long_class_name}",
f"{long_class_name}LMPredictionHead",
re.sub("Bert", long_class_name, REFERENCE_CODE),
)
# Copy consistency with overwrite
self.check_copy_consistency(
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->TestModel",
"TestModelLMPredictionHead",
REFERENCE_CODE,
overwrite_result=re.sub("Bert", "TestModel", REFERENCE_CODE),
)
def test_convert_to_localized_md(self):
localized_readme = check_copies.LOCALIZED_READMES["README_zh-hans.md"]
md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.\n1. **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/master/examples/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/master/examples/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/master/examples/distillation) and a German version of DistilBERT.\n1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)** (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://arxiv.org/abs/2003.10555) by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning."
localized_md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n"
converted_md_list_sample = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n1. **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (来自 HuggingFace) 伴随论文 [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) 由 Victor Sanh, Lysandre Debut and Thomas Wolf 发布。 The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/master/examples/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/master/examples/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/master/examples/distillation) and a German version of DistilBERT.\n1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)** (来自 Google Research/Stanford University) 伴随论文 [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://arxiv.org/abs/2003.10555) 由 Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning 发布。\n"
num_models_equal, converted_md_list = check_copies.convert_to_localized_md(
md_list, localized_md_list, localized_readme["format_model_list"]
)
self.assertFalse(num_models_equal)
self.assertEqual(converted_md_list, converted_md_list_sample)
num_models_equal, converted_md_list = check_copies.convert_to_localized_md(
md_list, converted_md_list, localized_readme["format_model_list"]
)
# Check whether the number of models is equal to README.md after conversion.
self.assertTrue(num_models_equal)
link_changed_md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut."
link_unchanged_md_list = "1. **[ALBERT](https://huggingface.co/transformers/master/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n"
converted_md_list_sample = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n"
num_models_equal, converted_md_list = check_copies.convert_to_localized_md(
link_changed_md_list, link_unchanged_md_list, localized_readme["format_model_list"]
)
# Check if the model link is synchronized.
self.assertEqual(converted_md_list, converted_md_list_sample)

View File

@@ -0,0 +1,97 @@
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from transformers.testing_utils import TestCasePlus
from transformers.utils.versions import importlib_metadata, require_version, require_version_core
numpy_ver = importlib_metadata.version("numpy")
python_ver = ".".join([str(x) for x in sys.version_info[:3]])
class DependencyVersionCheckTest(TestCasePlus):
def test_core(self):
# lt + different version strings
require_version_core("numpy<1000.4.5")
require_version_core("numpy<1000.4")
require_version_core("numpy<1000")
# le
require_version_core("numpy<=1000.4.5")
require_version_core(f"numpy<={numpy_ver}")
# eq
require_version_core(f"numpy=={numpy_ver}")
# ne
require_version_core("numpy!=1000.4.5")
# ge
require_version_core("numpy>=1.0")
require_version_core("numpy>=1.0.0")
require_version_core(f"numpy>={numpy_ver}")
# gt
require_version_core("numpy>1.0.0")
# mix
require_version_core("numpy>1.0.0,<1000")
# requirement w/o version
require_version_core("numpy")
# unmet requirements due to version conflict
for req in ["numpy==1.0.0", "numpy>=1000.0.0", f"numpy<{numpy_ver}"]:
try:
require_version_core(req)
except ImportError as e:
self.assertIn(f"{req} is required", str(e))
self.assertIn("but found", str(e))
# unmet requirements due to missing module
for req in ["numpipypie>1", "numpipypie2"]:
try:
require_version_core(req)
except importlib_metadata.PackageNotFoundError as e:
self.assertIn(f"The '{req}' distribution was not found and is required by this application", str(e))
self.assertIn("Try: pip install transformers -U", str(e))
# bogus requirements formats:
# 1. whole thing
for req in ["numpy??1.0.0", "numpy1.0.0"]:
try:
require_version_core(req)
except ValueError as e:
self.assertIn("requirement needs to be in the pip package format", str(e))
# 2. only operators
for req in ["numpy=1.0.0", "numpy == 1.00", "numpy<>1.0.0", "numpy><1.00", "numpy>>1.0.0"]:
try:
require_version_core(req)
except ValueError as e:
self.assertIn("need one of ", str(e))
def test_python(self):
# matching requirement
require_version("python>=3.6.0")
# not matching requirements
for req in ["python>9.9.9", "python<3.0.0"]:
try:
require_version_core(req)
except ImportError as e:
self.assertIn(f"{req} is required", str(e))
self.assertIn(f"but found python=={python_ver}", str(e))