[Test refactor 1/5] Per-folder tests reorganization (#15725)
* Per-folder tests reorganization Co-authored-by: sgugger <sylvain.gugger@gmail.com> Co-authored-by: Stas Bekman <stas@stason.org>
This commit is contained in:
0
tests/utils/__init__.py
Normal file
0
tests/utils/__init__.py
Normal file
64
tests/utils/test_activations.py
Normal file
64
tests/utils/test_activations.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers.testing_utils import require_torch
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers.activations import gelu_new, gelu_python, get_activation
|
||||
|
||||
|
||||
@require_torch
|
||||
class TestActivations(unittest.TestCase):
|
||||
def test_gelu_versions(self):
|
||||
x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
|
||||
torch_builtin = get_activation("gelu")
|
||||
self.assertTrue(torch.allclose(gelu_python(x), torch_builtin(x)))
|
||||
self.assertFalse(torch.allclose(gelu_python(x), gelu_new(x)))
|
||||
|
||||
def test_gelu_10(self):
|
||||
x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
|
||||
torch_builtin = get_activation("gelu")
|
||||
gelu10 = get_activation("gelu_10")
|
||||
|
||||
y_gelu = torch_builtin(x)
|
||||
y_gelu_10 = gelu10(x)
|
||||
|
||||
clipped_mask = torch.where(y_gelu_10 < 10.0, 1, 0)
|
||||
|
||||
self.assertTrue(torch.max(y_gelu_10).item() == 10.0)
|
||||
self.assertTrue(torch.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask))
|
||||
|
||||
def test_get_activation(self):
|
||||
get_activation("swish")
|
||||
get_activation("silu")
|
||||
get_activation("relu")
|
||||
get_activation("tanh")
|
||||
get_activation("gelu_new")
|
||||
get_activation("gelu_fast")
|
||||
get_activation("gelu_python")
|
||||
get_activation("gelu_10")
|
||||
get_activation("quick_gelu")
|
||||
get_activation("mish")
|
||||
get_activation("linear")
|
||||
get_activation("sigmoid")
|
||||
with self.assertRaises(KeyError):
|
||||
get_activation("bogus")
|
||||
with self.assertRaises(KeyError):
|
||||
get_activation(None)
|
||||
59
tests/utils/test_activations_tf.py
Normal file
59
tests/utils/test_activations_tf.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from transformers import is_tf_available
|
||||
from transformers.testing_utils import require_tf
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers.activations_tf import get_tf_activation
|
||||
|
||||
|
||||
@require_tf
|
||||
class TestTFActivations(unittest.TestCase):
|
||||
def test_gelu_10(self):
|
||||
x = tf.constant([-100, -1.0, -0.1, 0, 0.1, 1.0, 100.0])
|
||||
gelu = get_tf_activation("gelu")
|
||||
gelu10 = get_tf_activation("gelu_10")
|
||||
|
||||
y_gelu = gelu(x)
|
||||
y_gelu_10 = gelu10(x)
|
||||
|
||||
clipped_mask = tf.where(y_gelu_10 < 10.0, 1.0, 0.0)
|
||||
|
||||
self.assertEqual(tf.math.reduce_max(y_gelu_10).numpy().item(), 10.0)
|
||||
self.assertTrue(np.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask))
|
||||
|
||||
def test_get_activation(self):
|
||||
get_tf_activation("swish")
|
||||
get_tf_activation("silu")
|
||||
get_tf_activation("gelu")
|
||||
get_tf_activation("relu")
|
||||
get_tf_activation("tanh")
|
||||
get_tf_activation("gelu_new")
|
||||
get_tf_activation("gelu_fast")
|
||||
get_tf_activation("gelu_10")
|
||||
get_tf_activation("mish")
|
||||
get_tf_activation("quick_gelu")
|
||||
get_tf_activation("glu")
|
||||
with self.assertRaises(KeyError):
|
||||
get_tf_activation("bogus")
|
||||
with self.assertRaises(KeyError):
|
||||
get_tf_activation(None)
|
||||
1342
tests/utils/test_add_new_model_like.py
Normal file
1342
tests/utils/test_add_new_model_like.py
Normal file
File diff suppressed because it is too large
Load Diff
32
tests/utils/test_cli.py
Normal file
32
tests/utils/test_cli.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2019-present, the HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from transformers.testing_utils import CaptureStd
|
||||
|
||||
|
||||
class CLITest(unittest.TestCase):
|
||||
@patch("sys.argv", ["fakeprogrampath", "env"])
|
||||
def test_cli_env(self):
|
||||
# test transformers-cli env
|
||||
import transformers.commands.transformers_cli
|
||||
|
||||
with CaptureStd() as cs:
|
||||
transformers.commands.transformers_cli.main()
|
||||
self.assertIn("Python version", cs.out)
|
||||
self.assertIn("Platform", cs.out)
|
||||
self.assertIn("Using distributed or parallel set-up in script?", cs.out)
|
||||
114
tests/utils/test_doc_samples.py
Normal file
114
tests/utils/test_doc_samples.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2019-present, the HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import doctest
|
||||
import logging
|
||||
import os
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from typing import List, Union
|
||||
|
||||
import transformers
|
||||
from transformers.testing_utils import require_tf, require_torch, slow
|
||||
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
@unittest.skip("Temporarily disable the doc tests.")
|
||||
@require_torch
|
||||
@require_tf
|
||||
@slow
|
||||
class TestCodeExamples(unittest.TestCase):
|
||||
def analyze_directory(
|
||||
self,
|
||||
directory: Path,
|
||||
identifier: Union[str, None] = None,
|
||||
ignore_files: Union[List[str], None] = None,
|
||||
n_identifier: Union[str, List[str], None] = None,
|
||||
only_modules: bool = True,
|
||||
):
|
||||
"""
|
||||
Runs through the specific directory, looking for the files identified with `identifier`. Executes
|
||||
the doctests in those files
|
||||
|
||||
Args:
|
||||
directory (`Path`): Directory containing the files
|
||||
identifier (`str`): Will parse files containing this
|
||||
ignore_files (`List[str]`): List of files to skip
|
||||
n_identifier (`str` or `List[str]`): Will not parse files containing this/these identifiers.
|
||||
only_modules (`bool`): Whether to only analyze modules
|
||||
"""
|
||||
files = [file for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
|
||||
|
||||
if identifier is not None:
|
||||
files = [file for file in files if identifier in file]
|
||||
|
||||
if n_identifier is not None:
|
||||
if isinstance(n_identifier, List):
|
||||
for n_ in n_identifier:
|
||||
files = [file for file in files if n_ not in file]
|
||||
else:
|
||||
files = [file for file in files if n_identifier not in file]
|
||||
|
||||
ignore_files = ignore_files or []
|
||||
ignore_files.append("__init__.py")
|
||||
files = [file for file in files if file not in ignore_files]
|
||||
|
||||
for file in files:
|
||||
# Open all files
|
||||
print("Testing", file)
|
||||
|
||||
if only_modules:
|
||||
module_identifier = file.split(".")[0]
|
||||
try:
|
||||
module_identifier = getattr(transformers, module_identifier)
|
||||
suite = doctest.DocTestSuite(module_identifier)
|
||||
result = unittest.TextTestRunner().run(suite)
|
||||
self.assertIs(len(result.failures), 0)
|
||||
except AttributeError:
|
||||
logger.info(f"{module_identifier} is not a module.")
|
||||
else:
|
||||
result = doctest.testfile(str(".." / directory / file), optionflags=doctest.ELLIPSIS)
|
||||
self.assertIs(result.failed, 0)
|
||||
|
||||
def test_modeling_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
files = "modeling"
|
||||
ignore_files = [
|
||||
"modeling_ctrl.py",
|
||||
"modeling_tf_ctrl.py",
|
||||
]
|
||||
self.analyze_directory(transformers_directory, identifier=files, ignore_files=ignore_files)
|
||||
|
||||
def test_tokenization_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
files = "tokenization"
|
||||
self.analyze_directory(transformers_directory, identifier=files)
|
||||
|
||||
def test_configuration_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
files = "configuration"
|
||||
self.analyze_directory(transformers_directory, identifier=files)
|
||||
|
||||
def test_remaining_examples(self):
|
||||
transformers_directory = Path("src/transformers")
|
||||
n_identifiers = ["configuration", "modeling", "tokenization"]
|
||||
self.analyze_directory(transformers_directory, n_identifier=n_identifiers)
|
||||
|
||||
def test_doc_sources(self):
|
||||
doc_source_directory = Path("docs/source")
|
||||
ignore_files = ["favicon.ico"]
|
||||
self.analyze_directory(doc_source_directory, ignore_files=ignore_files, only_modules=False)
|
||||
181
tests/utils/test_file_utils.py
Normal file
181
tests/utils/test_file_utils.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import contextlib
|
||||
import importlib
|
||||
import io
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
import transformers
|
||||
|
||||
# Try to import everything from transformers to ensure every object can be loaded.
|
||||
from transformers import * # noqa F406
|
||||
from transformers.file_utils import (
|
||||
CONFIG_NAME,
|
||||
FLAX_WEIGHTS_NAME,
|
||||
TF2_WEIGHTS_NAME,
|
||||
WEIGHTS_NAME,
|
||||
ContextManagers,
|
||||
EntryNotFoundError,
|
||||
RepositoryNotFoundError,
|
||||
RevisionNotFoundError,
|
||||
filename_to_url,
|
||||
get_file_from_repo,
|
||||
get_from_cache,
|
||||
has_file,
|
||||
hf_bucket_url,
|
||||
)
|
||||
from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER
|
||||
|
||||
|
||||
MODEL_ID = DUMMY_UNKNOWN_IDENTIFIER
|
||||
# An actual model hosted on huggingface.co
|
||||
|
||||
REVISION_ID_DEFAULT = "main"
|
||||
# Default branch name
|
||||
REVISION_ID_ONE_SPECIFIC_COMMIT = "f2c752cfc5c0ab6f4bdec59acea69eefbee381c2"
|
||||
# One particular commit (not the top of `main`)
|
||||
REVISION_ID_INVALID = "aaaaaaa"
|
||||
# This commit does not exist, so we should 404.
|
||||
|
||||
PINNED_SHA1 = "d9e9f15bc825e4b2c9249e9578f884bbcb5e3684"
|
||||
# Sha-1 of config.json on the top of `main`, for checking purposes
|
||||
PINNED_SHA256 = "4b243c475af8d0a7754e87d7d096c92e5199ec2fe168a2ee7998e3b8e9bcb1d3"
|
||||
# Sha-256 of pytorch_model.bin on the top of `main`, for checking purposes
|
||||
|
||||
|
||||
# Dummy contexts to test `ContextManagers`
|
||||
@contextlib.contextmanager
|
||||
def context_en():
|
||||
print("Welcome!")
|
||||
yield
|
||||
print("Bye!")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def context_fr():
|
||||
print("Bonjour!")
|
||||
yield
|
||||
print("Au revoir!")
|
||||
|
||||
|
||||
class TestImportMechanisms(unittest.TestCase):
|
||||
def test_module_spec_available(self):
|
||||
# If the spec is missing, importlib would not be able to import the module dynamically.
|
||||
assert transformers.__spec__ is not None
|
||||
assert importlib.util.find_spec("transformers") is not None
|
||||
|
||||
|
||||
class GetFromCacheTests(unittest.TestCase):
|
||||
def test_bogus_url(self):
|
||||
# This lets us simulate no connection
|
||||
# as the error raised is the same
|
||||
# `ConnectionError`
|
||||
url = "https://bogus"
|
||||
with self.assertRaisesRegex(ValueError, "Connection error"):
|
||||
_ = get_from_cache(url)
|
||||
|
||||
def test_file_not_found(self):
|
||||
# Valid revision (None) but missing file.
|
||||
url = hf_bucket_url(MODEL_ID, filename="missing.bin")
|
||||
with self.assertRaisesRegex(EntryNotFoundError, "404 Client Error"):
|
||||
_ = get_from_cache(url)
|
||||
|
||||
def test_model_not_found(self):
|
||||
# Invalid model file.
|
||||
url = hf_bucket_url("bert-base", filename="pytorch_model.bin")
|
||||
with self.assertRaisesRegex(RepositoryNotFoundError, "404 Client Error"):
|
||||
_ = get_from_cache(url)
|
||||
|
||||
def test_revision_not_found(self):
|
||||
# Valid file but missing revision
|
||||
url = hf_bucket_url(MODEL_ID, filename=CONFIG_NAME, revision=REVISION_ID_INVALID)
|
||||
with self.assertRaisesRegex(RevisionNotFoundError, "404 Client Error"):
|
||||
_ = get_from_cache(url)
|
||||
|
||||
def test_standard_object(self):
|
||||
url = hf_bucket_url(MODEL_ID, filename=CONFIG_NAME, revision=REVISION_ID_DEFAULT)
|
||||
filepath = get_from_cache(url, force_download=True)
|
||||
metadata = filename_to_url(filepath)
|
||||
self.assertEqual(metadata, (url, f'"{PINNED_SHA1}"'))
|
||||
|
||||
def test_standard_object_rev(self):
|
||||
# Same object, but different revision
|
||||
url = hf_bucket_url(MODEL_ID, filename=CONFIG_NAME, revision=REVISION_ID_ONE_SPECIFIC_COMMIT)
|
||||
filepath = get_from_cache(url, force_download=True)
|
||||
metadata = filename_to_url(filepath)
|
||||
self.assertNotEqual(metadata[1], f'"{PINNED_SHA1}"')
|
||||
# Caution: check that the etag is *not* equal to the one from `test_standard_object`
|
||||
|
||||
def test_lfs_object(self):
|
||||
url = hf_bucket_url(MODEL_ID, filename=WEIGHTS_NAME, revision=REVISION_ID_DEFAULT)
|
||||
filepath = get_from_cache(url, force_download=True)
|
||||
metadata = filename_to_url(filepath)
|
||||
self.assertEqual(metadata, (url, f'"{PINNED_SHA256}"'))
|
||||
|
||||
def test_has_file(self):
|
||||
self.assertTrue(has_file("hf-internal-testing/tiny-bert-pt-only", WEIGHTS_NAME))
|
||||
self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", TF2_WEIGHTS_NAME))
|
||||
self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", FLAX_WEIGHTS_NAME))
|
||||
|
||||
def test_get_file_from_repo_distant(self):
|
||||
# `get_file_from_repo` returns None if the file does not exist
|
||||
self.assertIsNone(get_file_from_repo("bert-base-cased", "ahah.txt"))
|
||||
|
||||
# The function raises if the repository does not exist.
|
||||
with self.assertRaisesRegex(EnvironmentError, "is not a valid model identifier"):
|
||||
get_file_from_repo("bert-base-case", "config.json")
|
||||
|
||||
# The function raises if the revision does not exist.
|
||||
with self.assertRaisesRegex(EnvironmentError, "is not a valid git identifier"):
|
||||
get_file_from_repo("bert-base-cased", "config.json", revision="ahaha")
|
||||
|
||||
resolved_file = get_file_from_repo("bert-base-cased", "config.json")
|
||||
# The name is the cached name which is not very easy to test, so instead we load the content.
|
||||
config = json.loads(open(resolved_file, "r").read())
|
||||
self.assertEqual(config["hidden_size"], 768)
|
||||
|
||||
def test_get_file_from_repo_local(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
filename = Path(tmp_dir) / "a.txt"
|
||||
filename.touch()
|
||||
self.assertEqual(get_file_from_repo(tmp_dir, "a.txt"), str(filename))
|
||||
|
||||
self.assertIsNone(get_file_from_repo(tmp_dir, "b.txt"))
|
||||
|
||||
|
||||
class ContextManagerTests(unittest.TestCase):
|
||||
@unittest.mock.patch("sys.stdout", new_callable=io.StringIO)
|
||||
def test_no_context(self, mock_stdout):
|
||||
with ContextManagers([]):
|
||||
print("Transformers are awesome!")
|
||||
# The print statement adds a new line at the end of the output
|
||||
self.assertEqual(mock_stdout.getvalue(), "Transformers are awesome!\n")
|
||||
|
||||
@unittest.mock.patch("sys.stdout", new_callable=io.StringIO)
|
||||
def test_one_context(self, mock_stdout):
|
||||
with ContextManagers([context_en()]):
|
||||
print("Transformers are awesome!")
|
||||
# The output should be wrapped with an English welcome and goodbye
|
||||
self.assertEqual(mock_stdout.getvalue(), "Welcome!\nTransformers are awesome!\nBye!\n")
|
||||
|
||||
@unittest.mock.patch("sys.stdout", new_callable=io.StringIO)
|
||||
def test_two_context(self, mock_stdout):
|
||||
with ContextManagers([context_fr(), context_en()]):
|
||||
print("Transformers are awesome!")
|
||||
# The output should be wrapped with an English and French welcome and goodbye
|
||||
self.assertEqual(mock_stdout.getvalue(), "Bonjour!\nWelcome!\nTransformers are awesome!\nBye!\nAu revoir!\n")
|
||||
230
tests/utils/test_hf_argparser.py
Normal file
230
tests/utils/test_hf_argparser.py
Normal file
@@ -0,0 +1,230 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import unittest
|
||||
from argparse import Namespace
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
|
||||
from transformers import HfArgumentParser, TrainingArguments
|
||||
from transformers.hf_argparser import string_to_bool
|
||||
|
||||
|
||||
def list_field(default=None, metadata=None):
|
||||
return field(default_factory=lambda: default, metadata=metadata)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BasicExample:
|
||||
foo: int
|
||||
bar: float
|
||||
baz: str
|
||||
flag: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class WithDefaultExample:
|
||||
foo: int = 42
|
||||
baz: str = field(default="toto", metadata={"help": "help message"})
|
||||
|
||||
|
||||
@dataclass
|
||||
class WithDefaultBoolExample:
|
||||
foo: bool = False
|
||||
baz: bool = True
|
||||
opt: Optional[bool] = None
|
||||
|
||||
|
||||
class BasicEnum(Enum):
|
||||
titi = "titi"
|
||||
toto = "toto"
|
||||
|
||||
|
||||
@dataclass
|
||||
class EnumExample:
|
||||
foo: BasicEnum = "toto"
|
||||
|
||||
def __post_init__(self):
|
||||
self.foo = BasicEnum(self.foo)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OptionalExample:
|
||||
foo: Optional[int] = None
|
||||
bar: Optional[float] = field(default=None, metadata={"help": "help message"})
|
||||
baz: Optional[str] = None
|
||||
ces: Optional[List[str]] = list_field(default=[])
|
||||
des: Optional[List[int]] = list_field(default=[])
|
||||
|
||||
|
||||
@dataclass
|
||||
class ListExample:
|
||||
foo_int: List[int] = list_field(default=[])
|
||||
bar_int: List[int] = list_field(default=[1, 2, 3])
|
||||
foo_str: List[str] = list_field(default=["Hallo", "Bonjour", "Hello"])
|
||||
foo_float: List[float] = list_field(default=[0.1, 0.2, 0.3])
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequiredExample:
|
||||
required_list: List[int] = field()
|
||||
required_str: str = field()
|
||||
required_enum: BasicEnum = field()
|
||||
|
||||
def __post_init__(self):
|
||||
self.required_enum = BasicEnum(self.required_enum)
|
||||
|
||||
|
||||
class HfArgumentParserTest(unittest.TestCase):
|
||||
def argparsersEqual(self, a: argparse.ArgumentParser, b: argparse.ArgumentParser) -> bool:
|
||||
"""
|
||||
Small helper to check pseudo-equality of parsed arguments on `ArgumentParser` instances.
|
||||
"""
|
||||
self.assertEqual(len(a._actions), len(b._actions))
|
||||
for x, y in zip(a._actions, b._actions):
|
||||
xx = {k: v for k, v in vars(x).items() if k != "container"}
|
||||
yy = {k: v for k, v in vars(y).items() if k != "container"}
|
||||
self.assertEqual(xx, yy)
|
||||
|
||||
def test_basic(self):
|
||||
parser = HfArgumentParser(BasicExample)
|
||||
|
||||
expected = argparse.ArgumentParser()
|
||||
expected.add_argument("--foo", type=int, required=True)
|
||||
expected.add_argument("--bar", type=float, required=True)
|
||||
expected.add_argument("--baz", type=str, required=True)
|
||||
expected.add_argument("--flag", type=string_to_bool, default=False, const=True, nargs="?")
|
||||
self.argparsersEqual(parser, expected)
|
||||
|
||||
args = ["--foo", "1", "--baz", "quux", "--bar", "0.5"]
|
||||
(example,) = parser.parse_args_into_dataclasses(args, look_for_args_file=False)
|
||||
self.assertFalse(example.flag)
|
||||
|
||||
def test_with_default(self):
|
||||
parser = HfArgumentParser(WithDefaultExample)
|
||||
|
||||
expected = argparse.ArgumentParser()
|
||||
expected.add_argument("--foo", default=42, type=int)
|
||||
expected.add_argument("--baz", default="toto", type=str, help="help message")
|
||||
self.argparsersEqual(parser, expected)
|
||||
|
||||
def test_with_default_bool(self):
|
||||
parser = HfArgumentParser(WithDefaultBoolExample)
|
||||
|
||||
expected = argparse.ArgumentParser()
|
||||
expected.add_argument("--foo", type=string_to_bool, default=False, const=True, nargs="?")
|
||||
expected.add_argument("--baz", type=string_to_bool, default=True, const=True, nargs="?")
|
||||
# A boolean no_* argument always has to come after its "default: True" regular counter-part
|
||||
# and its default must be set to False
|
||||
expected.add_argument("--no_baz", action="store_false", default=False, dest="baz")
|
||||
expected.add_argument("--opt", type=string_to_bool, default=None)
|
||||
self.argparsersEqual(parser, expected)
|
||||
|
||||
args = parser.parse_args([])
|
||||
self.assertEqual(args, Namespace(foo=False, baz=True, opt=None))
|
||||
|
||||
args = parser.parse_args(["--foo", "--no_baz"])
|
||||
self.assertEqual(args, Namespace(foo=True, baz=False, opt=None))
|
||||
|
||||
args = parser.parse_args(["--foo", "--baz"])
|
||||
self.assertEqual(args, Namespace(foo=True, baz=True, opt=None))
|
||||
|
||||
args = parser.parse_args(["--foo", "True", "--baz", "True", "--opt", "True"])
|
||||
self.assertEqual(args, Namespace(foo=True, baz=True, opt=True))
|
||||
|
||||
args = parser.parse_args(["--foo", "False", "--baz", "False", "--opt", "False"])
|
||||
self.assertEqual(args, Namespace(foo=False, baz=False, opt=False))
|
||||
|
||||
def test_with_enum(self):
|
||||
parser = HfArgumentParser(EnumExample)
|
||||
|
||||
expected = argparse.ArgumentParser()
|
||||
expected.add_argument("--foo", default="toto", choices=["titi", "toto"], type=str)
|
||||
self.argparsersEqual(parser, expected)
|
||||
|
||||
args = parser.parse_args([])
|
||||
self.assertEqual(args.foo, "toto")
|
||||
enum_ex = parser.parse_args_into_dataclasses([])[0]
|
||||
self.assertEqual(enum_ex.foo, BasicEnum.toto)
|
||||
|
||||
args = parser.parse_args(["--foo", "titi"])
|
||||
self.assertEqual(args.foo, "titi")
|
||||
enum_ex = parser.parse_args_into_dataclasses(["--foo", "titi"])[0]
|
||||
self.assertEqual(enum_ex.foo, BasicEnum.titi)
|
||||
|
||||
def test_with_list(self):
|
||||
parser = HfArgumentParser(ListExample)
|
||||
|
||||
expected = argparse.ArgumentParser()
|
||||
expected.add_argument("--foo_int", nargs="+", default=[], type=int)
|
||||
expected.add_argument("--bar_int", nargs="+", default=[1, 2, 3], type=int)
|
||||
expected.add_argument("--foo_str", nargs="+", default=["Hallo", "Bonjour", "Hello"], type=str)
|
||||
expected.add_argument("--foo_float", nargs="+", default=[0.1, 0.2, 0.3], type=float)
|
||||
|
||||
self.argparsersEqual(parser, expected)
|
||||
|
||||
args = parser.parse_args([])
|
||||
self.assertEqual(
|
||||
args,
|
||||
Namespace(foo_int=[], bar_int=[1, 2, 3], foo_str=["Hallo", "Bonjour", "Hello"], foo_float=[0.1, 0.2, 0.3]),
|
||||
)
|
||||
|
||||
args = parser.parse_args("--foo_int 1 --bar_int 2 3 --foo_str a b c --foo_float 0.1 0.7".split())
|
||||
self.assertEqual(args, Namespace(foo_int=[1], bar_int=[2, 3], foo_str=["a", "b", "c"], foo_float=[0.1, 0.7]))
|
||||
|
||||
def test_with_optional(self):
|
||||
parser = HfArgumentParser(OptionalExample)
|
||||
|
||||
expected = argparse.ArgumentParser()
|
||||
expected.add_argument("--foo", default=None, type=int)
|
||||
expected.add_argument("--bar", default=None, type=float, help="help message")
|
||||
expected.add_argument("--baz", default=None, type=str)
|
||||
expected.add_argument("--ces", nargs="+", default=[], type=str)
|
||||
expected.add_argument("--des", nargs="+", default=[], type=int)
|
||||
self.argparsersEqual(parser, expected)
|
||||
|
||||
args = parser.parse_args([])
|
||||
self.assertEqual(args, Namespace(foo=None, bar=None, baz=None, ces=[], des=[]))
|
||||
|
||||
args = parser.parse_args("--foo 12 --bar 3.14 --baz 42 --ces a b c --des 1 2 3".split())
|
||||
self.assertEqual(args, Namespace(foo=12, bar=3.14, baz="42", ces=["a", "b", "c"], des=[1, 2, 3]))
|
||||
|
||||
def test_with_required(self):
|
||||
parser = HfArgumentParser(RequiredExample)
|
||||
|
||||
expected = argparse.ArgumentParser()
|
||||
expected.add_argument("--required_list", nargs="+", type=int, required=True)
|
||||
expected.add_argument("--required_str", type=str, required=True)
|
||||
expected.add_argument("--required_enum", type=str, choices=["titi", "toto"], required=True)
|
||||
self.argparsersEqual(parser, expected)
|
||||
|
||||
def test_parse_dict(self):
|
||||
parser = HfArgumentParser(BasicExample)
|
||||
|
||||
args_dict = {
|
||||
"foo": 12,
|
||||
"bar": 3.14,
|
||||
"baz": "42",
|
||||
"flag": True,
|
||||
}
|
||||
|
||||
parsed_args = parser.parse_dict(args_dict)[0]
|
||||
args = BasicExample(**args_dict)
|
||||
self.assertEqual(parsed_args, args)
|
||||
|
||||
def test_integration_training_args(self):
|
||||
parser = HfArgumentParser(TrainingArguments)
|
||||
self.assertIsNotNone(parser)
|
||||
487
tests/utils/test_image_utils.py
Normal file
487
tests/utils/test_image_utils.py
Normal file
@@ -0,0 +1,487 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2021 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
import datasets
|
||||
import numpy as np
|
||||
|
||||
from transformers import is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import require_torch, require_vision
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
if is_vision_available():
|
||||
import PIL.Image
|
||||
|
||||
from transformers import ImageFeatureExtractionMixin
|
||||
from transformers.image_utils import load_image
|
||||
|
||||
|
||||
def get_random_image(height, width):
|
||||
random_array = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)
|
||||
return PIL.Image.fromarray(random_array)
|
||||
|
||||
|
||||
@require_vision
|
||||
class ImageFeatureExtractionTester(unittest.TestCase):
|
||||
def test_conversion_image_to_array(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
image = get_random_image(16, 32)
|
||||
|
||||
# Conversion with defaults (rescale + channel first)
|
||||
array1 = feature_extractor.to_numpy_array(image)
|
||||
self.assertTrue(array1.dtype, np.float32)
|
||||
self.assertEqual(array1.shape, (3, 16, 32))
|
||||
|
||||
# Conversion with rescale and not channel first
|
||||
array2 = feature_extractor.to_numpy_array(image, channel_first=False)
|
||||
self.assertTrue(array2.dtype, np.float32)
|
||||
self.assertEqual(array2.shape, (16, 32, 3))
|
||||
self.assertTrue(np.array_equal(array1, array2.transpose(2, 0, 1)))
|
||||
|
||||
# Conversion with no rescale and channel first
|
||||
array3 = feature_extractor.to_numpy_array(image, rescale=False)
|
||||
self.assertTrue(array3.dtype, np.uint8)
|
||||
self.assertEqual(array3.shape, (3, 16, 32))
|
||||
self.assertTrue(np.array_equal(array1, array3.astype(np.float32) / 255.0))
|
||||
|
||||
# Conversion with no rescale and not channel first
|
||||
array4 = feature_extractor.to_numpy_array(image, rescale=False, channel_first=False)
|
||||
self.assertTrue(array4.dtype, np.uint8)
|
||||
self.assertEqual(array4.shape, (16, 32, 3))
|
||||
self.assertTrue(np.array_equal(array2, array4.astype(np.float32) / 255.0))
|
||||
|
||||
def test_conversion_array_to_array(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
array = np.random.randint(0, 256, (16, 32, 3), dtype=np.uint8)
|
||||
|
||||
# By default, rescale (for an array of ints) and channel permute
|
||||
array1 = feature_extractor.to_numpy_array(array)
|
||||
self.assertTrue(array1.dtype, np.float32)
|
||||
self.assertEqual(array1.shape, (3, 16, 32))
|
||||
self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) / 255.0))
|
||||
|
||||
# Same with no permute
|
||||
array2 = feature_extractor.to_numpy_array(array, channel_first=False)
|
||||
self.assertTrue(array2.dtype, np.float32)
|
||||
self.assertEqual(array2.shape, (16, 32, 3))
|
||||
self.assertTrue(np.array_equal(array2, array.astype(np.float32) / 255.0))
|
||||
|
||||
# Force rescale to False
|
||||
array3 = feature_extractor.to_numpy_array(array, rescale=False)
|
||||
self.assertTrue(array3.dtype, np.uint8)
|
||||
self.assertEqual(array3.shape, (3, 16, 32))
|
||||
self.assertTrue(np.array_equal(array3, array.transpose(2, 0, 1)))
|
||||
|
||||
# Force rescale to False and no channel permute
|
||||
array4 = feature_extractor.to_numpy_array(array, rescale=False, channel_first=False)
|
||||
self.assertTrue(array4.dtype, np.uint8)
|
||||
self.assertEqual(array4.shape, (16, 32, 3))
|
||||
self.assertTrue(np.array_equal(array4, array))
|
||||
|
||||
# Now test the default rescale for a float array (defaults to False)
|
||||
array5 = feature_extractor.to_numpy_array(array2)
|
||||
self.assertTrue(array5.dtype, np.float32)
|
||||
self.assertEqual(array5.shape, (3, 16, 32))
|
||||
self.assertTrue(np.array_equal(array5, array1))
|
||||
|
||||
@require_torch
|
||||
def test_conversion_torch_to_array(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
tensor = torch.randint(0, 256, (16, 32, 3))
|
||||
array = tensor.numpy()
|
||||
|
||||
# By default, rescale (for a tensor of ints) and channel permute
|
||||
array1 = feature_extractor.to_numpy_array(array)
|
||||
self.assertTrue(array1.dtype, np.float32)
|
||||
self.assertEqual(array1.shape, (3, 16, 32))
|
||||
self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) / 255.0))
|
||||
|
||||
# Same with no permute
|
||||
array2 = feature_extractor.to_numpy_array(array, channel_first=False)
|
||||
self.assertTrue(array2.dtype, np.float32)
|
||||
self.assertEqual(array2.shape, (16, 32, 3))
|
||||
self.assertTrue(np.array_equal(array2, array.astype(np.float32) / 255.0))
|
||||
|
||||
# Force rescale to False
|
||||
array3 = feature_extractor.to_numpy_array(array, rescale=False)
|
||||
self.assertTrue(array3.dtype, np.uint8)
|
||||
self.assertEqual(array3.shape, (3, 16, 32))
|
||||
self.assertTrue(np.array_equal(array3, array.transpose(2, 0, 1)))
|
||||
|
||||
# Force rescale to False and no channel permute
|
||||
array4 = feature_extractor.to_numpy_array(array, rescale=False, channel_first=False)
|
||||
self.assertTrue(array4.dtype, np.uint8)
|
||||
self.assertEqual(array4.shape, (16, 32, 3))
|
||||
self.assertTrue(np.array_equal(array4, array))
|
||||
|
||||
# Now test the default rescale for a float tensor (defaults to False)
|
||||
array5 = feature_extractor.to_numpy_array(array2)
|
||||
self.assertTrue(array5.dtype, np.float32)
|
||||
self.assertEqual(array5.shape, (3, 16, 32))
|
||||
self.assertTrue(np.array_equal(array5, array1))
|
||||
|
||||
def test_conversion_image_to_image(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
image = get_random_image(16, 32)
|
||||
|
||||
# On an image, `to_pil_image1` is a noop.
|
||||
image1 = feature_extractor.to_pil_image(image)
|
||||
self.assertTrue(isinstance(image, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image), np.array(image1)))
|
||||
|
||||
def test_conversion_array_to_image(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
array = np.random.randint(0, 256, (16, 32, 3), dtype=np.uint8)
|
||||
|
||||
# By default, no rescale (for an array of ints)
|
||||
image1 = feature_extractor.to_pil_image(array)
|
||||
self.assertTrue(isinstance(image1, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image1), array))
|
||||
|
||||
# If the array is channel-first, proper reordering of the channels is done.
|
||||
image2 = feature_extractor.to_pil_image(array.transpose(2, 0, 1))
|
||||
self.assertTrue(isinstance(image2, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image2), array))
|
||||
|
||||
# If the array has floating type, it's rescaled by default.
|
||||
image3 = feature_extractor.to_pil_image(array.astype(np.float32) / 255.0)
|
||||
self.assertTrue(isinstance(image3, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image3), array))
|
||||
|
||||
# You can override the default to rescale.
|
||||
image4 = feature_extractor.to_pil_image(array.astype(np.float32), rescale=False)
|
||||
self.assertTrue(isinstance(image4, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image4), array))
|
||||
|
||||
# And with floats + channel first.
|
||||
image5 = feature_extractor.to_pil_image(array.transpose(2, 0, 1).astype(np.float32) / 255.0)
|
||||
self.assertTrue(isinstance(image5, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image5), array))
|
||||
|
||||
@require_torch
|
||||
def test_conversion_tensor_to_image(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
tensor = torch.randint(0, 256, (16, 32, 3))
|
||||
array = tensor.numpy()
|
||||
|
||||
# By default, no rescale (for a tensor of ints)
|
||||
image1 = feature_extractor.to_pil_image(tensor)
|
||||
self.assertTrue(isinstance(image1, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image1), array))
|
||||
|
||||
# If the tensor is channel-first, proper reordering of the channels is done.
|
||||
image2 = feature_extractor.to_pil_image(tensor.permute(2, 0, 1))
|
||||
self.assertTrue(isinstance(image2, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image2), array))
|
||||
|
||||
# If the tensor has floating type, it's rescaled by default.
|
||||
image3 = feature_extractor.to_pil_image(tensor.float() / 255.0)
|
||||
self.assertTrue(isinstance(image3, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image3), array))
|
||||
|
||||
# You can override the default to rescale.
|
||||
image4 = feature_extractor.to_pil_image(tensor.float(), rescale=False)
|
||||
self.assertTrue(isinstance(image4, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image4), array))
|
||||
|
||||
# And with floats + channel first.
|
||||
image5 = feature_extractor.to_pil_image(tensor.permute(2, 0, 1).float() / 255.0)
|
||||
self.assertTrue(isinstance(image5, PIL.Image.Image))
|
||||
self.assertTrue(np.array_equal(np.array(image5), array))
|
||||
|
||||
def test_resize_image_and_array(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
image = get_random_image(16, 32)
|
||||
array = np.array(image)
|
||||
|
||||
# Size can be an int or a tuple of ints.
|
||||
resized_image = feature_extractor.resize(image, 8)
|
||||
self.assertTrue(isinstance(resized_image, PIL.Image.Image))
|
||||
self.assertEqual(resized_image.size, (8, 8))
|
||||
|
||||
resized_image1 = feature_extractor.resize(image, (8, 16))
|
||||
self.assertTrue(isinstance(resized_image1, PIL.Image.Image))
|
||||
self.assertEqual(resized_image1.size, (8, 16))
|
||||
|
||||
# Passing an array converts it to a PIL Image.
|
||||
resized_image2 = feature_extractor.resize(array, 8)
|
||||
self.assertTrue(isinstance(resized_image2, PIL.Image.Image))
|
||||
self.assertEqual(resized_image2.size, (8, 8))
|
||||
self.assertTrue(np.array_equal(np.array(resized_image), np.array(resized_image2)))
|
||||
|
||||
resized_image3 = feature_extractor.resize(image, (8, 16))
|
||||
self.assertTrue(isinstance(resized_image3, PIL.Image.Image))
|
||||
self.assertEqual(resized_image3.size, (8, 16))
|
||||
self.assertTrue(np.array_equal(np.array(resized_image1), np.array(resized_image3)))
|
||||
|
||||
def test_resize_image_and_array_non_default_to_square(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
|
||||
heights_widths = [
|
||||
# height, width
|
||||
# square image
|
||||
(28, 28),
|
||||
(27, 27),
|
||||
# rectangular image: h < w
|
||||
(28, 34),
|
||||
(29, 35),
|
||||
# rectangular image: h > w
|
||||
(34, 28),
|
||||
(35, 29),
|
||||
]
|
||||
|
||||
# single integer or single integer in tuple/list
|
||||
sizes = [22, 27, 28, 36, [22], (27,)]
|
||||
|
||||
for (height, width), size in zip(heights_widths, sizes):
|
||||
for max_size in (None, 37, 1000):
|
||||
image = get_random_image(height, width)
|
||||
array = np.array(image)
|
||||
|
||||
size = size[0] if isinstance(size, (list, tuple)) else size
|
||||
# Size can be an int or a tuple of ints.
|
||||
# If size is an int, smaller edge of the image will be matched to this number.
|
||||
# i.e, if height > width, then image will be rescaled to (size * height / width, size).
|
||||
if height < width:
|
||||
exp_w, exp_h = (int(size * width / height), size)
|
||||
if max_size is not None and max_size < exp_w:
|
||||
exp_w, exp_h = max_size, int(max_size * exp_h / exp_w)
|
||||
elif width < height:
|
||||
exp_w, exp_h = (size, int(size * height / width))
|
||||
if max_size is not None and max_size < exp_h:
|
||||
exp_w, exp_h = int(max_size * exp_w / exp_h), max_size
|
||||
else:
|
||||
exp_w, exp_h = (size, size)
|
||||
if max_size is not None and max_size < size:
|
||||
exp_w, exp_h = max_size, max_size
|
||||
|
||||
resized_image = feature_extractor.resize(image, size=size, default_to_square=False, max_size=max_size)
|
||||
self.assertTrue(isinstance(resized_image, PIL.Image.Image))
|
||||
self.assertEqual(resized_image.size, (exp_w, exp_h))
|
||||
|
||||
# Passing an array converts it to a PIL Image.
|
||||
resized_image2 = feature_extractor.resize(array, size=size, default_to_square=False, max_size=max_size)
|
||||
self.assertTrue(isinstance(resized_image2, PIL.Image.Image))
|
||||
self.assertEqual(resized_image2.size, (exp_w, exp_h))
|
||||
self.assertTrue(np.array_equal(np.array(resized_image), np.array(resized_image2)))
|
||||
|
||||
@require_torch
|
||||
def test_resize_tensor(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
tensor = torch.randint(0, 256, (16, 32, 3))
|
||||
array = tensor.numpy()
|
||||
|
||||
# Size can be an int or a tuple of ints.
|
||||
resized_image = feature_extractor.resize(tensor, 8)
|
||||
self.assertTrue(isinstance(resized_image, PIL.Image.Image))
|
||||
self.assertEqual(resized_image.size, (8, 8))
|
||||
|
||||
resized_image1 = feature_extractor.resize(tensor, (8, 16))
|
||||
self.assertTrue(isinstance(resized_image1, PIL.Image.Image))
|
||||
self.assertEqual(resized_image1.size, (8, 16))
|
||||
|
||||
# Check we get the same results as with NumPy arrays.
|
||||
resized_image2 = feature_extractor.resize(array, 8)
|
||||
self.assertTrue(np.array_equal(np.array(resized_image), np.array(resized_image2)))
|
||||
|
||||
resized_image3 = feature_extractor.resize(array, (8, 16))
|
||||
self.assertTrue(np.array_equal(np.array(resized_image1), np.array(resized_image3)))
|
||||
|
||||
def test_normalize_image(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
image = get_random_image(16, 32)
|
||||
array = np.array(image)
|
||||
mean = [0.1, 0.5, 0.9]
|
||||
std = [0.2, 0.4, 0.6]
|
||||
|
||||
# PIL Image are converted to NumPy arrays for the normalization
|
||||
normalized_image = feature_extractor.normalize(image, mean, std)
|
||||
self.assertTrue(isinstance(normalized_image, np.ndarray))
|
||||
self.assertEqual(normalized_image.shape, (3, 16, 32))
|
||||
|
||||
# During the conversion rescale and channel first will be applied.
|
||||
expected = array.transpose(2, 0, 1).astype(np.float32) / 255.0
|
||||
np_mean = np.array(mean).astype(np.float32)[:, None, None]
|
||||
np_std = np.array(std).astype(np.float32)[:, None, None]
|
||||
expected = (expected - np_mean) / np_std
|
||||
self.assertTrue(np.array_equal(normalized_image, expected))
|
||||
|
||||
def test_normalize_array(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
array = np.random.random((16, 32, 3))
|
||||
mean = [0.1, 0.5, 0.9]
|
||||
std = [0.2, 0.4, 0.6]
|
||||
|
||||
# mean and std can be passed as lists or NumPy arrays.
|
||||
expected = (array - np.array(mean)) / np.array(std)
|
||||
normalized_array = feature_extractor.normalize(array, mean, std)
|
||||
self.assertTrue(np.array_equal(normalized_array, expected))
|
||||
|
||||
normalized_array = feature_extractor.normalize(array, np.array(mean), np.array(std))
|
||||
self.assertTrue(np.array_equal(normalized_array, expected))
|
||||
|
||||
# Normalize will detect automatically if channel first or channel last is used.
|
||||
array = np.random.random((3, 16, 32))
|
||||
expected = (array - np.array(mean)[:, None, None]) / np.array(std)[:, None, None]
|
||||
normalized_array = feature_extractor.normalize(array, mean, std)
|
||||
self.assertTrue(np.array_equal(normalized_array, expected))
|
||||
|
||||
normalized_array = feature_extractor.normalize(array, np.array(mean), np.array(std))
|
||||
self.assertTrue(np.array_equal(normalized_array, expected))
|
||||
|
||||
@require_torch
|
||||
def test_normalize_tensor(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
tensor = torch.rand(16, 32, 3)
|
||||
mean = [0.1, 0.5, 0.9]
|
||||
std = [0.2, 0.4, 0.6]
|
||||
|
||||
# mean and std can be passed as lists or tensors.
|
||||
expected = (tensor - torch.tensor(mean)) / torch.tensor(std)
|
||||
normalized_tensor = feature_extractor.normalize(tensor, mean, std)
|
||||
self.assertTrue(torch.equal(normalized_tensor, expected))
|
||||
|
||||
normalized_tensor = feature_extractor.normalize(tensor, torch.tensor(mean), torch.tensor(std))
|
||||
self.assertTrue(torch.equal(normalized_tensor, expected))
|
||||
|
||||
# Normalize will detect automatically if channel first or channel last is used.
|
||||
tensor = torch.rand(3, 16, 32)
|
||||
expected = (tensor - torch.tensor(mean)[:, None, None]) / torch.tensor(std)[:, None, None]
|
||||
normalized_tensor = feature_extractor.normalize(tensor, mean, std)
|
||||
self.assertTrue(torch.equal(normalized_tensor, expected))
|
||||
|
||||
normalized_tensor = feature_extractor.normalize(tensor, torch.tensor(mean), torch.tensor(std))
|
||||
self.assertTrue(torch.equal(normalized_tensor, expected))
|
||||
|
||||
def test_center_crop_image(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
image = get_random_image(16, 32)
|
||||
|
||||
# Test various crop sizes: bigger on all dimensions, on one of the dimensions only and on both dimensions.
|
||||
crop_sizes = [8, (8, 64), 20, (32, 64)]
|
||||
for size in crop_sizes:
|
||||
cropped_image = feature_extractor.center_crop(image, size)
|
||||
self.assertTrue(isinstance(cropped_image, PIL.Image.Image))
|
||||
|
||||
# PIL Image.size is transposed compared to NumPy or PyTorch (width first instead of height first).
|
||||
expected_size = (size, size) if isinstance(size, int) else (size[1], size[0])
|
||||
self.assertEqual(cropped_image.size, expected_size)
|
||||
|
||||
def test_center_crop_array(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
image = get_random_image(16, 32)
|
||||
array = feature_extractor.to_numpy_array(image)
|
||||
|
||||
# Test various crop sizes: bigger on all dimensions, on one of the dimensions only and on both dimensions.
|
||||
crop_sizes = [8, (8, 64), 20, (32, 64)]
|
||||
for size in crop_sizes:
|
||||
cropped_array = feature_extractor.center_crop(array, size)
|
||||
self.assertTrue(isinstance(cropped_array, np.ndarray))
|
||||
|
||||
expected_size = (size, size) if isinstance(size, int) else size
|
||||
self.assertEqual(cropped_array.shape[-2:], expected_size)
|
||||
|
||||
# Check result is consistent with PIL.Image.crop
|
||||
cropped_image = feature_extractor.center_crop(image, size)
|
||||
self.assertTrue(np.array_equal(cropped_array, feature_extractor.to_numpy_array(cropped_image)))
|
||||
|
||||
@require_torch
|
||||
def test_center_crop_tensor(self):
|
||||
feature_extractor = ImageFeatureExtractionMixin()
|
||||
image = get_random_image(16, 32)
|
||||
array = feature_extractor.to_numpy_array(image)
|
||||
tensor = torch.tensor(array)
|
||||
|
||||
# Test various crop sizes: bigger on all dimensions, on one of the dimensions only and on both dimensions.
|
||||
crop_sizes = [8, (8, 64), 20, (32, 64)]
|
||||
for size in crop_sizes:
|
||||
cropped_tensor = feature_extractor.center_crop(tensor, size)
|
||||
self.assertTrue(isinstance(cropped_tensor, torch.Tensor))
|
||||
|
||||
expected_size = (size, size) if isinstance(size, int) else size
|
||||
self.assertEqual(cropped_tensor.shape[-2:], expected_size)
|
||||
|
||||
# Check result is consistent with PIL.Image.crop
|
||||
cropped_image = feature_extractor.center_crop(image, size)
|
||||
self.assertTrue(torch.equal(cropped_tensor, torch.tensor(feature_extractor.to_numpy_array(cropped_image))))
|
||||
|
||||
|
||||
@require_vision
|
||||
class LoadImageTester(unittest.TestCase):
|
||||
def test_load_img_local(self):
|
||||
img = load_image("./tests/fixtures/tests_samples/COCO/000000039769.png")
|
||||
img_arr = np.array(img)
|
||||
|
||||
self.assertEqual(
|
||||
img_arr.shape,
|
||||
(480, 640, 3),
|
||||
)
|
||||
|
||||
def test_load_img_rgba(self):
|
||||
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
|
||||
|
||||
img = load_image(dataset[0]["file"]) # img with mode RGBA
|
||||
img_arr = np.array(img)
|
||||
|
||||
self.assertEqual(
|
||||
img_arr.shape,
|
||||
(512, 512, 3),
|
||||
)
|
||||
|
||||
def test_load_img_la(self):
|
||||
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
|
||||
|
||||
img = load_image(dataset[1]["file"]) # img with mode LA
|
||||
img_arr = np.array(img)
|
||||
|
||||
self.assertEqual(
|
||||
img_arr.shape,
|
||||
(512, 768, 3),
|
||||
)
|
||||
|
||||
def test_load_img_l(self):
|
||||
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
|
||||
|
||||
img = load_image(dataset[2]["file"]) # img with mode L
|
||||
img_arr = np.array(img)
|
||||
|
||||
self.assertEqual(
|
||||
img_arr.shape,
|
||||
(381, 225, 3),
|
||||
)
|
||||
|
||||
def test_load_img_exif_transpose(self):
|
||||
dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", "image", split="test")
|
||||
img_file = dataset[3]["file"]
|
||||
|
||||
img_without_exif_transpose = PIL.Image.open(img_file)
|
||||
img_arr_without_exif_transpose = np.array(img_without_exif_transpose)
|
||||
|
||||
self.assertEqual(
|
||||
img_arr_without_exif_transpose.shape,
|
||||
(333, 500, 3),
|
||||
)
|
||||
|
||||
img_with_exif_transpose = load_image(img_file)
|
||||
img_arr_with_exif_transpose = np.array(img_with_exif_transpose)
|
||||
|
||||
self.assertEqual(
|
||||
img_arr_with_exif_transpose.shape,
|
||||
(500, 333, 3),
|
||||
)
|
||||
139
tests/utils/test_logging.py
Normal file
139
tests/utils/test_logging.py
Normal file
@@ -0,0 +1,139 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
import transformers.models.bart.tokenization_bart
|
||||
from transformers import AutoConfig, logging
|
||||
from transformers.testing_utils import CaptureLogger, mockenv, mockenv_context
|
||||
from transformers.utils.logging import disable_progress_bar, enable_progress_bar
|
||||
|
||||
|
||||
class HfArgumentParserTest(unittest.TestCase):
|
||||
def test_set_level(self):
|
||||
logger = logging.get_logger()
|
||||
|
||||
# the current default level is logging.WARNING
|
||||
level_origin = logging.get_verbosity()
|
||||
|
||||
logging.set_verbosity_error()
|
||||
self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())
|
||||
|
||||
logging.set_verbosity_warning()
|
||||
self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())
|
||||
|
||||
logging.set_verbosity_info()
|
||||
self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())
|
||||
|
||||
logging.set_verbosity_debug()
|
||||
self.assertEqual(logger.getEffectiveLevel(), logging.get_verbosity())
|
||||
|
||||
# restore to the original level
|
||||
logging.set_verbosity(level_origin)
|
||||
|
||||
def test_integration(self):
|
||||
level_origin = logging.get_verbosity()
|
||||
|
||||
logger = logging.get_logger("transformers.models.bart.tokenization_bart")
|
||||
msg = "Testing 1, 2, 3"
|
||||
|
||||
# should be able to log warnings (if default settings weren't overridden by `pytest --log-level-all`)
|
||||
if level_origin <= logging.WARNING:
|
||||
with CaptureLogger(logger) as cl:
|
||||
logger.warning(msg)
|
||||
self.assertEqual(cl.out, msg + "\n")
|
||||
|
||||
# this is setting the level for all of `transformers.*` loggers
|
||||
logging.set_verbosity_error()
|
||||
|
||||
# should not be able to log warnings
|
||||
with CaptureLogger(logger) as cl:
|
||||
logger.warning(msg)
|
||||
self.assertEqual(cl.out, "")
|
||||
|
||||
# should be able to log warnings again
|
||||
logging.set_verbosity_warning()
|
||||
with CaptureLogger(logger) as cl:
|
||||
logger.warning(msg)
|
||||
self.assertEqual(cl.out, msg + "\n")
|
||||
|
||||
# restore to the original level
|
||||
logging.set_verbosity(level_origin)
|
||||
|
||||
@mockenv(TRANSFORMERS_VERBOSITY="error")
|
||||
def test_env_override(self):
|
||||
# reset for the env var to take effect, next time some logger call is made
|
||||
transformers.utils.logging._reset_library_root_logger()
|
||||
# this action activates the env var
|
||||
_ = logging.get_logger("transformers.models.bart.tokenization_bart")
|
||||
|
||||
env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
|
||||
env_level = logging.log_levels[env_level_str]
|
||||
|
||||
current_level = logging.get_verbosity()
|
||||
self.assertEqual(
|
||||
env_level,
|
||||
current_level,
|
||||
f"TRANSFORMERS_VERBOSITY={env_level_str}/{env_level}, but internal verbosity is {current_level}",
|
||||
)
|
||||
|
||||
# restore to the original level
|
||||
os.environ["TRANSFORMERS_VERBOSITY"] = ""
|
||||
transformers.utils.logging._reset_library_root_logger()
|
||||
|
||||
@mockenv(TRANSFORMERS_VERBOSITY="super-error")
|
||||
def test_env_invalid_override(self):
|
||||
# reset for the env var to take effect, next time some logger call is made
|
||||
transformers.utils.logging._reset_library_root_logger()
|
||||
logger = logging.logging.getLogger()
|
||||
with CaptureLogger(logger) as cl:
|
||||
# this action activates the env var
|
||||
logging.get_logger("transformers.models.bart.tokenization_bart")
|
||||
self.assertIn("Unknown option TRANSFORMERS_VERBOSITY=super-error", cl.out)
|
||||
|
||||
# no need to restore as nothing was changed
|
||||
|
||||
def test_advisory_warnings(self):
|
||||
# testing `logger.warning_advice()`
|
||||
|
||||
logger = logging.get_logger("transformers.models.bart.tokenization_bart")
|
||||
msg = "Testing 1, 2, 3"
|
||||
|
||||
with mockenv_context(TRANSFORMERS_NO_ADVISORY_WARNINGS="1"):
|
||||
# nothing should be logged as env var disables this method
|
||||
with CaptureLogger(logger) as cl:
|
||||
logger.warning_advice(msg)
|
||||
self.assertEqual(cl.out, "")
|
||||
|
||||
with mockenv_context(TRANSFORMERS_NO_ADVISORY_WARNINGS=""):
|
||||
# should log normally as TRANSFORMERS_NO_ADVISORY_WARNINGS is unset
|
||||
with CaptureLogger(logger) as cl:
|
||||
logger.warning_advice(msg)
|
||||
self.assertEqual(cl.out, msg + "\n")
|
||||
|
||||
|
||||
def test_set_progress_bar_enabled():
|
||||
TINY_MODEL = "hf-internal-testing/tiny-random-distilbert"
|
||||
with patch("tqdm.auto.tqdm") as mock_tqdm:
|
||||
disable_progress_bar()
|
||||
_ = AutoConfig.from_pretrained(TINY_MODEL, force_download=True)
|
||||
mock_tqdm.assert_not_called()
|
||||
|
||||
mock_tqdm.reset_mock()
|
||||
|
||||
enable_progress_bar()
|
||||
_ = AutoConfig.from_pretrained(TINY_MODEL, force_download=True)
|
||||
mock_tqdm.assert_called()
|
||||
81
tests/utils/test_model_card.py
Normal file
81
tests/utils/test_model_card.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2019 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from transformers.modelcard import ModelCard
|
||||
|
||||
|
||||
class ModelCardTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.inputs_dict = {
|
||||
"model_details": {
|
||||
"Organization": "testing",
|
||||
"Model date": "today",
|
||||
"Model version": "v2.1, Developed by Test Corp in 2019.",
|
||||
"Architecture": "Convolutional Neural Network.",
|
||||
},
|
||||
"metrics": "BLEU and ROUGE-1",
|
||||
"evaluation_data": {
|
||||
"Datasets": {"BLEU": "My-great-dataset-v1", "ROUGE-1": "My-short-dataset-v2.1"},
|
||||
"Preprocessing": "See details on https://arxiv.org/pdf/1810.03993.pdf",
|
||||
},
|
||||
"training_data": {
|
||||
"Dataset": "English Wikipedia dump dated 2018-12-01",
|
||||
"Preprocessing": "Using SentencePiece vocabulary of size 52k tokens. See details on https://arxiv.org/pdf/1810.03993.pdf",
|
||||
},
|
||||
"quantitative_analyses": {"BLEU": 55.1, "ROUGE-1": 76},
|
||||
}
|
||||
|
||||
def test_model_card_common_properties(self):
|
||||
modelcard = ModelCard.from_dict(self.inputs_dict)
|
||||
self.assertTrue(hasattr(modelcard, "model_details"))
|
||||
self.assertTrue(hasattr(modelcard, "intended_use"))
|
||||
self.assertTrue(hasattr(modelcard, "factors"))
|
||||
self.assertTrue(hasattr(modelcard, "metrics"))
|
||||
self.assertTrue(hasattr(modelcard, "evaluation_data"))
|
||||
self.assertTrue(hasattr(modelcard, "training_data"))
|
||||
self.assertTrue(hasattr(modelcard, "quantitative_analyses"))
|
||||
self.assertTrue(hasattr(modelcard, "ethical_considerations"))
|
||||
self.assertTrue(hasattr(modelcard, "caveats_and_recommendations"))
|
||||
|
||||
def test_model_card_to_json_string(self):
|
||||
modelcard = ModelCard.from_dict(self.inputs_dict)
|
||||
obj = json.loads(modelcard.to_json_string())
|
||||
for key, value in self.inputs_dict.items():
|
||||
self.assertEqual(obj[key], value)
|
||||
|
||||
def test_model_card_to_json_file(self):
|
||||
model_card_first = ModelCard.from_dict(self.inputs_dict)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
filename = os.path.join(tmpdirname, "modelcard.json")
|
||||
model_card_first.to_json_file(filename)
|
||||
model_card_second = ModelCard.from_json_file(filename)
|
||||
|
||||
self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
|
||||
|
||||
def test_model_card_from_and_save_pretrained(self):
|
||||
model_card_first = ModelCard.from_dict(self.inputs_dict)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model_card_first.save_pretrained(tmpdirname)
|
||||
model_card_second = ModelCard.from_pretrained(tmpdirname)
|
||||
|
||||
self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
|
||||
109
tests/utils/test_model_output.py
Normal file
109
tests/utils/test_model_output.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2020 The Hugging Face Team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from transformers.file_utils import ModelOutput
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelOutputTest(ModelOutput):
|
||||
a: float
|
||||
b: Optional[float] = None
|
||||
c: Optional[float] = None
|
||||
|
||||
|
||||
class ModelOutputTester(unittest.TestCase):
|
||||
def test_get_attributes(self):
|
||||
x = ModelOutputTest(a=30)
|
||||
self.assertEqual(x.a, 30)
|
||||
self.assertIsNone(x.b)
|
||||
self.assertIsNone(x.c)
|
||||
with self.assertRaises(AttributeError):
|
||||
_ = x.d
|
||||
|
||||
def test_index_with_ints_and_slices(self):
|
||||
x = ModelOutputTest(a=30, b=10)
|
||||
self.assertEqual(x[0], 30)
|
||||
self.assertEqual(x[1], 10)
|
||||
self.assertEqual(x[:2], (30, 10))
|
||||
self.assertEqual(x[:], (30, 10))
|
||||
|
||||
x = ModelOutputTest(a=30, c=10)
|
||||
self.assertEqual(x[0], 30)
|
||||
self.assertEqual(x[1], 10)
|
||||
self.assertEqual(x[:2], (30, 10))
|
||||
self.assertEqual(x[:], (30, 10))
|
||||
|
||||
def test_index_with_strings(self):
|
||||
x = ModelOutputTest(a=30, b=10)
|
||||
self.assertEqual(x["a"], 30)
|
||||
self.assertEqual(x["b"], 10)
|
||||
with self.assertRaises(KeyError):
|
||||
_ = x["c"]
|
||||
|
||||
x = ModelOutputTest(a=30, c=10)
|
||||
self.assertEqual(x["a"], 30)
|
||||
self.assertEqual(x["c"], 10)
|
||||
with self.assertRaises(KeyError):
|
||||
_ = x["b"]
|
||||
|
||||
def test_dict_like_properties(self):
|
||||
x = ModelOutputTest(a=30)
|
||||
self.assertEqual(list(x.keys()), ["a"])
|
||||
self.assertEqual(list(x.values()), [30])
|
||||
self.assertEqual(list(x.items()), [("a", 30)])
|
||||
self.assertEqual(list(x), ["a"])
|
||||
|
||||
x = ModelOutputTest(a=30, b=10)
|
||||
self.assertEqual(list(x.keys()), ["a", "b"])
|
||||
self.assertEqual(list(x.values()), [30, 10])
|
||||
self.assertEqual(list(x.items()), [("a", 30), ("b", 10)])
|
||||
self.assertEqual(list(x), ["a", "b"])
|
||||
|
||||
x = ModelOutputTest(a=30, c=10)
|
||||
self.assertEqual(list(x.keys()), ["a", "c"])
|
||||
self.assertEqual(list(x.values()), [30, 10])
|
||||
self.assertEqual(list(x.items()), [("a", 30), ("c", 10)])
|
||||
self.assertEqual(list(x), ["a", "c"])
|
||||
|
||||
with self.assertRaises(Exception):
|
||||
x = x.update({"d": 20})
|
||||
with self.assertRaises(Exception):
|
||||
del x["a"]
|
||||
with self.assertRaises(Exception):
|
||||
_ = x.pop("a")
|
||||
with self.assertRaises(Exception):
|
||||
_ = x.setdefault("d", 32)
|
||||
|
||||
def test_set_attributes(self):
|
||||
x = ModelOutputTest(a=30)
|
||||
x.a = 10
|
||||
self.assertEqual(x.a, 10)
|
||||
self.assertEqual(x["a"], 10)
|
||||
|
||||
def test_set_keys(self):
|
||||
x = ModelOutputTest(a=30)
|
||||
x["a"] = 10
|
||||
self.assertEqual(x.a, 10)
|
||||
self.assertEqual(x["a"], 10)
|
||||
|
||||
def test_instantiate_from_dict(self):
|
||||
x = ModelOutputTest({"a": 30, "b": 10})
|
||||
self.assertEqual(list(x.keys()), ["a", "b"])
|
||||
self.assertEqual(x.a, 30)
|
||||
self.assertEqual(x.b, 10)
|
||||
357
tests/utils/test_modeling_tf_core.py
Normal file
357
tests/utils/test_modeling_tf_core.py
Normal file
@@ -0,0 +1,357 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2019 HuggingFace Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import copy
|
||||
import os
|
||||
import tempfile
|
||||
from importlib import import_module
|
||||
|
||||
from transformers import is_tf_available
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import _tf_gpu_memory_limit, require_tf, slow
|
||||
|
||||
from ..test_modeling_tf_common import ids_tensor
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers import (
|
||||
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
|
||||
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_FOR_MASKED_LM_MAPPING,
|
||||
TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
|
||||
TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING,
|
||||
TF_MODEL_FOR_PRETRAINING_MAPPING,
|
||||
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
|
||||
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
|
||||
TFSharedEmbeddings,
|
||||
)
|
||||
|
||||
if _tf_gpu_memory_limit is not None:
|
||||
gpus = tf.config.list_physical_devices("GPU")
|
||||
for gpu in gpus:
|
||||
# Restrict TensorFlow to only allocate x GB of memory on the GPUs
|
||||
try:
|
||||
tf.config.set_logical_device_configuration(
|
||||
gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]
|
||||
)
|
||||
logical_gpus = tf.config.list_logical_devices("GPU")
|
||||
print("Logical GPUs", logical_gpus)
|
||||
except RuntimeError as e:
|
||||
# Virtual devices must be set before GPUs have been initialized
|
||||
print(e)
|
||||
|
||||
|
||||
@require_tf
|
||||
class TFCoreModelTesterMixin:
|
||||
|
||||
model_tester = None
|
||||
all_model_classes = ()
|
||||
all_generative_model_classes = ()
|
||||
test_mismatched_shapes = True
|
||||
test_resize_embeddings = True
|
||||
test_head_masking = True
|
||||
is_encoder_decoder = False
|
||||
|
||||
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:
|
||||
inputs_dict = copy.deepcopy(inputs_dict)
|
||||
|
||||
if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||
inputs_dict = {
|
||||
k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1))
|
||||
if isinstance(v, tf.Tensor) and v.ndim > 0
|
||||
else v
|
||||
for k, v in inputs_dict.items()
|
||||
}
|
||||
|
||||
if return_labels:
|
||||
if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||
inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32)
|
||||
elif model_class in get_values(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING):
|
||||
inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||
inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||
elif model_class in [
|
||||
*get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
|
||||
*get_values(TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),
|
||||
]:
|
||||
inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||
elif model_class in get_values(TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING):
|
||||
inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||
elif model_class in [
|
||||
*get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
|
||||
*get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING),
|
||||
*get_values(TF_MODEL_FOR_MASKED_LM_MAPPING),
|
||||
*get_values(TF_MODEL_FOR_PRETRAINING_MAPPING),
|
||||
*get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
|
||||
]:
|
||||
inputs_dict["labels"] = tf.zeros(
|
||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32
|
||||
)
|
||||
return inputs_dict
|
||||
|
||||
@slow
|
||||
def test_graph_mode(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
inputs = self._prepare_for_class(inputs_dict, model_class)
|
||||
model = model_class(config)
|
||||
|
||||
@tf.function
|
||||
def run_in_graph_mode():
|
||||
return model(inputs)
|
||||
|
||||
outputs = run_in_graph_mode()
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
@slow
|
||||
def test_xla_mode(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
inputs = self._prepare_for_class(inputs_dict, model_class)
|
||||
model = model_class(config)
|
||||
|
||||
@tf.function(experimental_compile=True)
|
||||
def run_in_graph_mode():
|
||||
return model(inputs)
|
||||
|
||||
outputs = run_in_graph_mode()
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
@slow
|
||||
def test_saved_model_creation(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.output_hidden_states = False
|
||||
config.output_attentions = False
|
||||
|
||||
if hasattr(config, "use_cache"):
|
||||
config.use_cache = False
|
||||
|
||||
model_class = self.all_model_classes[0]
|
||||
|
||||
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
|
||||
model = model_class(config)
|
||||
|
||||
model(class_inputs_dict)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model.save_pretrained(tmpdirname, saved_model=True)
|
||||
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
|
||||
self.assertTrue(os.path.exists(saved_model_dir))
|
||||
|
||||
@slow
|
||||
def test_saved_model_creation_extended(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.output_hidden_states = True
|
||||
config.output_attentions = True
|
||||
|
||||
if hasattr(config, "use_cache"):
|
||||
config.use_cache = True
|
||||
|
||||
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length)
|
||||
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
|
||||
model = model_class(config)
|
||||
num_out = len(model(class_inputs_dict))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model.save_pretrained(tmpdirname, saved_model=True)
|
||||
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
|
||||
model = tf.keras.models.load_model(saved_model_dir)
|
||||
outputs = model(class_inputs_dict)
|
||||
|
||||
if self.is_encoder_decoder:
|
||||
output_hidden_states = outputs["encoder_hidden_states"]
|
||||
output_attentions = outputs["encoder_attentions"]
|
||||
else:
|
||||
output_hidden_states = outputs["hidden_states"]
|
||||
output_attentions = outputs["attentions"]
|
||||
|
||||
self.assertEqual(len(outputs), num_out)
|
||||
|
||||
expected_num_layers = getattr(
|
||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
||||
)
|
||||
|
||||
self.assertEqual(len(output_hidden_states), expected_num_layers)
|
||||
self.assertListEqual(
|
||||
list(output_hidden_states[0].shape[-2:]),
|
||||
[self.model_tester.seq_length, self.model_tester.hidden_size],
|
||||
)
|
||||
|
||||
self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers)
|
||||
self.assertListEqual(
|
||||
list(output_attentions[0].shape[-3:]),
|
||||
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
|
||||
)
|
||||
|
||||
@slow
|
||||
def test_mixed_precision(self):
|
||||
tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
|
||||
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
|
||||
model = model_class(config)
|
||||
outputs = model(class_inputs_dict)
|
||||
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
tf.keras.mixed_precision.experimental.set_policy("float32")
|
||||
|
||||
@slow
|
||||
def test_train_pipeline_custom_model(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
# head_mask and decoder_head_mask has different shapes than other input args
|
||||
if "head_mask" in inputs_dict:
|
||||
del inputs_dict["head_mask"]
|
||||
if "decoder_head_mask" in inputs_dict:
|
||||
del inputs_dict["decoder_head_mask"]
|
||||
if "cross_attn_head_mask" in inputs_dict:
|
||||
del inputs_dict["cross_attn_head_mask"]
|
||||
tf_main_layer_classes = set(
|
||||
module_member
|
||||
for model_class in self.all_model_classes
|
||||
for module in (import_module(model_class.__module__),)
|
||||
for module_member_name in dir(module)
|
||||
if module_member_name.endswith("MainLayer")
|
||||
for module_member in (getattr(module, module_member_name),)
|
||||
if isinstance(module_member, type)
|
||||
and tf.keras.layers.Layer in module_member.__bases__
|
||||
and getattr(module_member, "_keras_serializable", False)
|
||||
)
|
||||
|
||||
for main_layer_class in tf_main_layer_classes:
|
||||
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
|
||||
if "T5" in main_layer_class.__name__:
|
||||
# Take the same values than in TFT5ModelTester for this shared layer
|
||||
shared = TFSharedEmbeddings(self.model_tester.vocab_size, self.model_tester.hidden_size, name="shared")
|
||||
config.use_cache = False
|
||||
main_layer = main_layer_class(config, embed_tokens=shared)
|
||||
else:
|
||||
main_layer = main_layer_class(config)
|
||||
|
||||
symbolic_inputs = {
|
||||
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
|
||||
}
|
||||
|
||||
if hasattr(self.model_tester, "num_labels"):
|
||||
num_labels = self.model_tester.num_labels
|
||||
else:
|
||||
num_labels = 2
|
||||
|
||||
X = tf.data.Dataset.from_tensor_slices(
|
||||
(inputs_dict, np.ones((self.model_tester.batch_size, self.model_tester.seq_length, num_labels, 1)))
|
||||
).batch(1)
|
||||
|
||||
hidden_states = main_layer(symbolic_inputs)[0]
|
||||
outputs = tf.keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states)
|
||||
model = tf.keras.models.Model(inputs=symbolic_inputs, outputs=[outputs])
|
||||
|
||||
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["binary_accuracy"])
|
||||
model.fit(X, epochs=1)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
filepath = os.path.join(tmpdirname, "keras_model.h5")
|
||||
model.save(filepath)
|
||||
if "T5" in main_layer_class.__name__:
|
||||
model = tf.keras.models.load_model(
|
||||
filepath,
|
||||
custom_objects={
|
||||
main_layer_class.__name__: main_layer_class,
|
||||
"TFSharedEmbeddings": TFSharedEmbeddings,
|
||||
},
|
||||
)
|
||||
else:
|
||||
model = tf.keras.models.load_model(
|
||||
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
|
||||
)
|
||||
assert isinstance(model, tf.keras.Model)
|
||||
model(inputs_dict)
|
||||
|
||||
@slow
|
||||
def test_graph_mode_with_inputs_embeds(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
|
||||
inputs = copy.deepcopy(inputs_dict)
|
||||
|
||||
if not self.is_encoder_decoder:
|
||||
input_ids = inputs["input_ids"]
|
||||
del inputs["input_ids"]
|
||||
else:
|
||||
encoder_input_ids = inputs["input_ids"]
|
||||
decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
|
||||
del inputs["input_ids"]
|
||||
inputs.pop("decoder_input_ids", None)
|
||||
|
||||
if not self.is_encoder_decoder:
|
||||
inputs["inputs_embeds"] = model.get_input_embeddings()(input_ids)
|
||||
else:
|
||||
inputs["inputs_embeds"] = model.get_input_embeddings()(encoder_input_ids)
|
||||
inputs["decoder_inputs_embeds"] = model.get_input_embeddings()(decoder_input_ids)
|
||||
|
||||
inputs = self._prepare_for_class(inputs, model_class)
|
||||
|
||||
@tf.function
|
||||
def run_in_graph_mode():
|
||||
return model(inputs)
|
||||
|
||||
outputs = run_in_graph_mode()
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
def _generate_random_bad_tokens(self, num_bad_tokens, model):
|
||||
# special tokens cannot be bad tokens
|
||||
special_tokens = []
|
||||
if model.config.bos_token_id is not None:
|
||||
special_tokens.append(model.config.bos_token_id)
|
||||
if model.config.pad_token_id is not None:
|
||||
special_tokens.append(model.config.pad_token_id)
|
||||
if model.config.eos_token_id is not None:
|
||||
special_tokens.append(model.config.eos_token_id)
|
||||
|
||||
# create random bad tokens that are not special tokens
|
||||
bad_tokens = []
|
||||
while len(bad_tokens) < num_bad_tokens:
|
||||
token = tf.squeeze(ids_tensor((1, 1), self.model_tester.vocab_size), 0).numpy()[0]
|
||||
if token not in special_tokens:
|
||||
bad_tokens.append(token)
|
||||
return bad_tokens
|
||||
|
||||
def _check_generated_ids(self, output_ids):
|
||||
for token_id in output_ids[0].numpy().tolist():
|
||||
self.assertGreaterEqual(token_id, 0)
|
||||
self.assertLess(token_id, self.model_tester.vocab_size)
|
||||
|
||||
def _check_match_tokens(self, generated_ids, bad_words_ids):
|
||||
# for all bad word tokens
|
||||
for bad_word_ids in bad_words_ids:
|
||||
# for all slices in batch
|
||||
for generated_ids_slice in generated_ids:
|
||||
# for all word idx
|
||||
for i in range(len(bad_word_ids), len(generated_ids_slice)):
|
||||
# if tokens match
|
||||
if generated_ids_slice[i - len(bad_word_ids) : i] == bad_word_ids:
|
||||
return True
|
||||
return False
|
||||
71
tests/utils/test_offline.py
Normal file
71
tests/utils/test_offline.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from transformers.testing_utils import TestCasePlus, require_torch
|
||||
|
||||
|
||||
class OfflineTests(TestCasePlus):
|
||||
@require_torch
|
||||
def test_offline_mode(self):
|
||||
|
||||
# this test is a bit tricky since TRANSFORMERS_OFFLINE can only be changed before
|
||||
# `transformers` is loaded, and it's too late for inside pytest - so we are changing it
|
||||
# while running an external program
|
||||
|
||||
# python one-liner segments
|
||||
|
||||
# this must be loaded before socket.socket is monkey-patched
|
||||
load = """
|
||||
from transformers import BertConfig, BertModel, BertTokenizer
|
||||
"""
|
||||
|
||||
run = """
|
||||
mname = "lysandre/tiny-bert-random"
|
||||
BertConfig.from_pretrained(mname)
|
||||
BertModel.from_pretrained(mname)
|
||||
BertTokenizer.from_pretrained(mname)
|
||||
print("success")
|
||||
"""
|
||||
|
||||
mock = """
|
||||
import socket
|
||||
def offline_socket(*args, **kwargs): raise socket.error("Offline mode is enabled")
|
||||
socket.socket = offline_socket
|
||||
"""
|
||||
|
||||
# baseline - just load from_pretrained with normal network
|
||||
cmd = [sys.executable, "-c", "\n".join([load, run])]
|
||||
|
||||
# should succeed
|
||||
env = self.get_env()
|
||||
result = subprocess.run(cmd, env=env, check=False, capture_output=True)
|
||||
self.assertEqual(result.returncode, 0, result.stderr)
|
||||
self.assertIn("success", result.stdout.decode())
|
||||
|
||||
# next emulate no network
|
||||
cmd = [sys.executable, "-c", "\n".join([load, mock, run])]
|
||||
|
||||
# should normally fail as it will fail to lookup the model files w/o the network
|
||||
env["TRANSFORMERS_OFFLINE"] = "0"
|
||||
result = subprocess.run(cmd, env=env, check=False, capture_output=True)
|
||||
self.assertEqual(result.returncode, 1, result.stderr)
|
||||
|
||||
# should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files
|
||||
env["TRANSFORMERS_OFFLINE"] = "1"
|
||||
result = subprocess.run(cmd, env=env, check=False, capture_output=True)
|
||||
self.assertEqual(result.returncode, 0, result.stderr)
|
||||
self.assertIn("success", result.stdout.decode())
|
||||
120
tests/utils/test_skip_decorators.py
Normal file
120
tests/utils/test_skip_decorators.py
Normal file
@@ -0,0 +1,120 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2019-present, the HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
#
|
||||
#
|
||||
# this test validates that we can stack skip decorators in groups and whether
|
||||
# they work correctly with other decorators
|
||||
#
|
||||
# since the decorators have already built their decision params (like checking
|
||||
# env[], we can't mock the env and test each of the combinations), so ideally
|
||||
# the following 4 should be run. But since we have different CI jobs running
|
||||
# different configs, all combinations should get covered
|
||||
#
|
||||
# RUN_SLOW=1 pytest -rA tests/test_skip_decorators.py
|
||||
# RUN_SLOW=1 CUDA_VISIBLE_DEVICES="" pytest -rA tests/test_skip_decorators.py
|
||||
# RUN_SLOW=0 pytest -rA tests/test_skip_decorators.py
|
||||
# RUN_SLOW=0 CUDA_VISIBLE_DEVICES="" pytest -rA tests/test_skip_decorators.py
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
from parameterized import parameterized
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
|
||||
|
||||
# skipping in unittest tests
|
||||
|
||||
params = [(1,)]
|
||||
|
||||
|
||||
# test that we can stack our skip decorators with 3rd party decorators
|
||||
def check_slow():
|
||||
run_slow = bool(os.getenv("RUN_SLOW", 0))
|
||||
if run_slow:
|
||||
assert True
|
||||
else:
|
||||
assert False, "should have been skipped"
|
||||
|
||||
|
||||
# test that we can stack our skip decorators
|
||||
def check_slow_torch_cuda():
|
||||
run_slow = bool(os.getenv("RUN_SLOW", 0))
|
||||
if run_slow and torch_device == "cuda":
|
||||
assert True
|
||||
else:
|
||||
assert False, "should have been skipped"
|
||||
|
||||
|
||||
@require_torch
|
||||
class SkipTester(unittest.TestCase):
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
def test_2_skips_slow_first(self):
|
||||
check_slow_torch_cuda()
|
||||
|
||||
@require_torch_gpu
|
||||
@slow
|
||||
def test_2_skips_slow_last(self):
|
||||
check_slow_torch_cuda()
|
||||
|
||||
# The combination of any skip decorator, followed by parameterized fails to skip the tests
|
||||
# 1. @slow manages to correctly skip `test_param_slow_first`
|
||||
# 2. but then `parameterized` creates new tests, with a unique name for each parameter groups.
|
||||
# It has no idea that they are to be skipped and so they all run, ignoring @slow
|
||||
# Therefore skip decorators must come after `parameterized`
|
||||
#
|
||||
# @slow
|
||||
# @parameterized.expand(params)
|
||||
# def test_param_slow_first(self, param=None):
|
||||
# check_slow()
|
||||
|
||||
# This works as expected:
|
||||
# 1. `parameterized` creates new tests with unique names
|
||||
# 2. each of them gets an opportunity to be skipped
|
||||
@parameterized.expand(params)
|
||||
@slow
|
||||
def test_param_slow_last(self, param=None):
|
||||
check_slow()
|
||||
|
||||
|
||||
# skipping in non-unittest tests
|
||||
# no problem at all here
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
def test_pytest_2_skips_slow_first():
|
||||
check_slow_torch_cuda()
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@slow
|
||||
def test_pytest_2_skips_slow_last():
|
||||
check_slow_torch_cuda()
|
||||
|
||||
|
||||
@slow
|
||||
@pytest.mark.parametrize("param", [1])
|
||||
def test_pytest_param_slow_first(param):
|
||||
check_slow()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("param", [1])
|
||||
@slow
|
||||
def test_pytest_param_slow_last(param):
|
||||
check_slow()
|
||||
155
tests/utils/test_utils_check_copies.py
Normal file
155
tests/utils/test_utils_check_copies.py
Normal file
@@ -0,0 +1,155 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import black
|
||||
|
||||
|
||||
git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
sys.path.append(os.path.join(git_repo_path, "utils"))
|
||||
|
||||
import check_copies # noqa: E402
|
||||
|
||||
|
||||
# This is the reference code that will be used in the tests.
|
||||
# If BertLMPredictionHead is changed in modeling_bert.py, this code needs to be manually updated.
|
||||
REFERENCE_CODE = """ def __init__(self, config):
|
||||
super().__init__()
|
||||
self.transform = BertPredictionHeadTransform(config)
|
||||
|
||||
# The output weights are the same as the input embeddings, but there is
|
||||
# an output-only bias for each token.
|
||||
self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||
|
||||
self.bias = nn.Parameter(torch.zeros(config.vocab_size))
|
||||
|
||||
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
|
||||
self.decoder.bias = self.bias
|
||||
|
||||
def forward(self, hidden_states):
|
||||
hidden_states = self.transform(hidden_states)
|
||||
hidden_states = self.decoder(hidden_states)
|
||||
return hidden_states
|
||||
"""
|
||||
|
||||
|
||||
class CopyCheckTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.transformer_dir = tempfile.mkdtemp()
|
||||
os.makedirs(os.path.join(self.transformer_dir, "models/bert/"))
|
||||
check_copies.TRANSFORMER_PATH = self.transformer_dir
|
||||
shutil.copy(
|
||||
os.path.join(git_repo_path, "src/transformers/models/bert/modeling_bert.py"),
|
||||
os.path.join(self.transformer_dir, "models/bert/modeling_bert.py"),
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
check_copies.TRANSFORMER_PATH = "src/transformers"
|
||||
shutil.rmtree(self.transformer_dir)
|
||||
|
||||
def check_copy_consistency(self, comment, class_name, class_code, overwrite_result=None):
|
||||
code = comment + f"\nclass {class_name}(nn.Module):\n" + class_code
|
||||
if overwrite_result is not None:
|
||||
expected = comment + f"\nclass {class_name}(nn.Module):\n" + overwrite_result
|
||||
mode = black.Mode(target_versions={black.TargetVersion.PY35}, line_length=119)
|
||||
code = black.format_str(code, mode=mode)
|
||||
fname = os.path.join(self.transformer_dir, "new_code.py")
|
||||
with open(fname, "w", newline="\n") as f:
|
||||
f.write(code)
|
||||
if overwrite_result is None:
|
||||
self.assertTrue(len(check_copies.is_copy_consistent(fname)) == 0)
|
||||
else:
|
||||
check_copies.is_copy_consistent(f.name, overwrite=True)
|
||||
with open(fname, "r") as f:
|
||||
self.assertTrue(f.read(), expected)
|
||||
|
||||
def test_find_code_in_transformers(self):
|
||||
code = check_copies.find_code_in_transformers("models.bert.modeling_bert.BertLMPredictionHead")
|
||||
self.assertEqual(code, REFERENCE_CODE)
|
||||
|
||||
def test_is_copy_consistent(self):
|
||||
# Base copy consistency
|
||||
self.check_copy_consistency(
|
||||
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead",
|
||||
"BertLMPredictionHead",
|
||||
REFERENCE_CODE + "\n",
|
||||
)
|
||||
|
||||
# With no empty line at the end
|
||||
self.check_copy_consistency(
|
||||
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead",
|
||||
"BertLMPredictionHead",
|
||||
REFERENCE_CODE,
|
||||
)
|
||||
|
||||
# Copy consistency with rename
|
||||
self.check_copy_consistency(
|
||||
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->TestModel",
|
||||
"TestModelLMPredictionHead",
|
||||
re.sub("Bert", "TestModel", REFERENCE_CODE),
|
||||
)
|
||||
|
||||
# Copy consistency with a really long name
|
||||
long_class_name = "TestModelWithAReallyLongNameBecauseSomePeopleLikeThatForSomeReason"
|
||||
self.check_copy_consistency(
|
||||
f"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->{long_class_name}",
|
||||
f"{long_class_name}LMPredictionHead",
|
||||
re.sub("Bert", long_class_name, REFERENCE_CODE),
|
||||
)
|
||||
|
||||
# Copy consistency with overwrite
|
||||
self.check_copy_consistency(
|
||||
"# Copied from transformers.models.bert.modeling_bert.BertLMPredictionHead with Bert->TestModel",
|
||||
"TestModelLMPredictionHead",
|
||||
REFERENCE_CODE,
|
||||
overwrite_result=re.sub("Bert", "TestModel", REFERENCE_CODE),
|
||||
)
|
||||
|
||||
def test_convert_to_localized_md(self):
|
||||
localized_readme = check_copies.LOCALIZED_READMES["README_zh-hans.md"]
|
||||
|
||||
md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.\n1. **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/master/examples/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/master/examples/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/master/examples/distillation) and a German version of DistilBERT.\n1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)** (from Google Research/Stanford University) released with the paper [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://arxiv.org/abs/2003.10555) by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning."
|
||||
localized_md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n"
|
||||
converted_md_list_sample = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n1. **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (来自 HuggingFace) 伴随论文 [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) 由 Victor Sanh, Lysandre Debut and Thomas Wolf 发布。 The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/master/examples/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/master/examples/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/master/examples/distillation) and a German version of DistilBERT.\n1. **[ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)** (来自 Google Research/Stanford University) 伴随论文 [ELECTRA: Pre-training text encoders as discriminators rather than generators](https://arxiv.org/abs/2003.10555) 由 Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning 发布。\n"
|
||||
|
||||
num_models_equal, converted_md_list = check_copies.convert_to_localized_md(
|
||||
md_list, localized_md_list, localized_readme["format_model_list"]
|
||||
)
|
||||
|
||||
self.assertFalse(num_models_equal)
|
||||
self.assertEqual(converted_md_list, converted_md_list_sample)
|
||||
|
||||
num_models_equal, converted_md_list = check_copies.convert_to_localized_md(
|
||||
md_list, converted_md_list, localized_readme["format_model_list"]
|
||||
)
|
||||
|
||||
# Check whether the number of models is equal to README.md after conversion.
|
||||
self.assertTrue(num_models_equal)
|
||||
|
||||
link_changed_md_list = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut."
|
||||
link_unchanged_md_list = "1. **[ALBERT](https://huggingface.co/transformers/master/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n"
|
||||
converted_md_list_sample = "1. **[ALBERT](https://huggingface.co/transformers/model_doc/albert.html)** (来自 Google Research and the Toyota Technological Institute at Chicago) 伴随论文 [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), 由 Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut 发布。\n"
|
||||
|
||||
num_models_equal, converted_md_list = check_copies.convert_to_localized_md(
|
||||
link_changed_md_list, link_unchanged_md_list, localized_readme["format_model_list"]
|
||||
)
|
||||
|
||||
# Check if the model link is synchronized.
|
||||
self.assertEqual(converted_md_list, converted_md_list_sample)
|
||||
97
tests/utils/test_versions_utils.py
Normal file
97
tests/utils/test_versions_utils.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
|
||||
from transformers.testing_utils import TestCasePlus
|
||||
from transformers.utils.versions import importlib_metadata, require_version, require_version_core
|
||||
|
||||
|
||||
numpy_ver = importlib_metadata.version("numpy")
|
||||
python_ver = ".".join([str(x) for x in sys.version_info[:3]])
|
||||
|
||||
|
||||
class DependencyVersionCheckTest(TestCasePlus):
|
||||
def test_core(self):
|
||||
# lt + different version strings
|
||||
require_version_core("numpy<1000.4.5")
|
||||
require_version_core("numpy<1000.4")
|
||||
require_version_core("numpy<1000")
|
||||
|
||||
# le
|
||||
require_version_core("numpy<=1000.4.5")
|
||||
require_version_core(f"numpy<={numpy_ver}")
|
||||
|
||||
# eq
|
||||
require_version_core(f"numpy=={numpy_ver}")
|
||||
|
||||
# ne
|
||||
require_version_core("numpy!=1000.4.5")
|
||||
|
||||
# ge
|
||||
require_version_core("numpy>=1.0")
|
||||
require_version_core("numpy>=1.0.0")
|
||||
require_version_core(f"numpy>={numpy_ver}")
|
||||
|
||||
# gt
|
||||
require_version_core("numpy>1.0.0")
|
||||
|
||||
# mix
|
||||
require_version_core("numpy>1.0.0,<1000")
|
||||
|
||||
# requirement w/o version
|
||||
require_version_core("numpy")
|
||||
|
||||
# unmet requirements due to version conflict
|
||||
for req in ["numpy==1.0.0", "numpy>=1000.0.0", f"numpy<{numpy_ver}"]:
|
||||
try:
|
||||
require_version_core(req)
|
||||
except ImportError as e:
|
||||
self.assertIn(f"{req} is required", str(e))
|
||||
self.assertIn("but found", str(e))
|
||||
|
||||
# unmet requirements due to missing module
|
||||
for req in ["numpipypie>1", "numpipypie2"]:
|
||||
try:
|
||||
require_version_core(req)
|
||||
except importlib_metadata.PackageNotFoundError as e:
|
||||
self.assertIn(f"The '{req}' distribution was not found and is required by this application", str(e))
|
||||
self.assertIn("Try: pip install transformers -U", str(e))
|
||||
|
||||
# bogus requirements formats:
|
||||
# 1. whole thing
|
||||
for req in ["numpy??1.0.0", "numpy1.0.0"]:
|
||||
try:
|
||||
require_version_core(req)
|
||||
except ValueError as e:
|
||||
self.assertIn("requirement needs to be in the pip package format", str(e))
|
||||
# 2. only operators
|
||||
for req in ["numpy=1.0.0", "numpy == 1.00", "numpy<>1.0.0", "numpy><1.00", "numpy>>1.0.0"]:
|
||||
try:
|
||||
require_version_core(req)
|
||||
except ValueError as e:
|
||||
self.assertIn("need one of ", str(e))
|
||||
|
||||
def test_python(self):
|
||||
|
||||
# matching requirement
|
||||
require_version("python>=3.6.0")
|
||||
|
||||
# not matching requirements
|
||||
for req in ["python>9.9.9", "python<3.0.0"]:
|
||||
try:
|
||||
require_version_core(req)
|
||||
except ImportError as e:
|
||||
self.assertIn(f"{req} is required", str(e))
|
||||
self.assertIn(f"but found python=={python_ver}", str(e))
|
||||
Reference in New Issue
Block a user