[testing] port test_trainer_distributed to distributed pytest + TestCasePlus enhancements (#8107)

* move the helper code into testing_utils

* port test_trainer_distributed to work with pytest

* improve docs

* simplify notes

* doc

* doc

* style

* doc

* further improvements

* torch might not be available

* real fix

* Apply suggestions from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Stas Bekman
2020-10-28 08:51:32 -07:00
committed by GitHub
parent 47dfa65b0c
commit 5423f2a9d4
6 changed files with 308 additions and 148 deletions

View File

@@ -476,7 +476,31 @@ class TestCasePlus(unittest.TestCase):
"""
This class extends `unittest.TestCase` with additional features.
Feature 1: Flexible auto-removable temp dirs which are guaranteed to get removed at the end of test.
Feature 1: A set of fully resolved important file and dir path accessors.
In tests often we need to know where things are relative to the current test file, and it's not trivial since the
test could be invoked from more than one directory or could reside in sub-directories with different depths. This
class solves this problem by sorting out all the basic paths and provides easy accessors to them:
* ``pathlib`` objects (all fully resolved):
- ``test_file_path`` - the current test file path (=``__file__``)
- ``test_file_dir`` - the directory containing the current test file
- ``tests_dir`` - the directory of the ``tests`` test suite
- ``examples_dir`` - the directory of the ``examples`` test suite
- ``repo_root_dir`` - the directory of the repository
- ``src_dir`` - the directory of ``src`` (i.e. where the ``transformers`` sub-dir resides)
* stringified paths---same as above but these return paths as strings, rather than ``pathlib`` objects:
- ``test_file_path_str``
- ``test_file_dir_str``
- ``tests_dir_str``
- ``examples_dir_str``
- ``repo_root_dir_str``
- ``src_dir_str``
Feature 2: Flexible auto-removable temp dirs which are guaranteed to get removed at the end of test.
In all the following scenarios the temp dir will be auto-removed at the end of test, unless `after=False`.
@@ -499,7 +523,6 @@ class TestCasePlus(unittest.TestCase):
temp results
::
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", after=False)
@@ -517,11 +540,104 @@ class TestCasePlus(unittest.TestCase):
Note 2: Each test can register multiple temp dirs and they all will get auto-removed, unless requested otherwise.
Feature 3: Get a copy of the ``os.environ`` object that sets up ``PYTHONPATH`` specific to the current test suite.
This is useful for invoking external programs from the test suite - e.g. distributed training.
::
def test_whatever(self):
env = self.get_env()
"""
def setUp(self):
self.teardown_tmp_dirs = []
# figure out the resolved paths for repo_root, tests, examples, etc.
self._test_file_path = inspect.getfile(self.__class__)
path = Path(self._test_file_path).resolve()
self._test_file_dir = path.parents[0]
for up in [1, 2, 3]:
tmp_dir = path.parents[up]
if (tmp_dir / "src").is_dir() and (tmp_dir / "tests").is_dir():
break
if tmp_dir:
self._repo_root_dir = tmp_dir
else:
raise ValueError(f"can't figure out the root of the repo from {self._test_file_path}")
self._tests_dir = self._repo_root_dir / "tests"
self._examples_dir = self._repo_root_dir / "examples"
self._src_dir = self._repo_root_dir / "src"
@property
def test_file_path(self):
return self._test_file_path
@property
def test_file_path_str(self):
return str(self._test_file_path)
@property
def test_file_dir(self):
return self._test_file_dir
@property
def test_file_dir_str(self):
return str(self._test_file_dir)
@property
def tests_dir(self):
return self._tests_dir
@property
def tests_dir_str(self):
return str(self._tests_dir)
@property
def examples_dir(self):
return self._examples_dir
@property
def examples_dir_str(self):
return str(self._examples_dir)
@property
def repo_root_dir(self):
return self._repo_root_dir
@property
def repo_root_dir_str(self):
return str(self._repo_root_dir)
@property
def src_dir(self):
return self._src_dir
@property
def src_dir_str(self):
return str(self._src_dir)
def get_env(self):
"""
Return a copy of the ``os.environ`` object that sets up ``PYTHONPATH`` correctly, depending on the test suite
it's invoked from. This is useful for invoking external programs from the test suite - e.g. distributed
training.
It always inserts ``./src`` first, then ``./tests`` or ``./examples`` depending on the test suite type and
finally the preset ``PYTHONPATH`` if any (all full resolved paths).
"""
env = os.environ.copy()
paths = [self.src_dir_str]
if "/examples" in self.test_file_dir_str:
paths.append(self.examples_dir_str)
else:
paths.append(self.tests_dir_str)
paths.append(env.get("PYTHONPATH", ""))
env["PYTHONPATH"] = ":".join(paths)
return env
def get_auto_remove_tmp_dir(self, tmp_dir=None, after=True, before=False):
"""
Args:
@@ -676,3 +792,84 @@ def pytest_terminal_summary_main(tr, id):
tr._tw = orig_writer
tr.reportchars = orig_reportchars
config.option.tbstyle = orig_tbstyle
# the following code deals with async io between processes
# adapted from https://stackoverflow.com/a/59041913/9201239
import asyncio # noqa
class _RunOutput:
def __init__(self, returncode, stdout, stderr):
self.returncode = returncode
self.stdout = stdout
self.stderr = stderr
async def _read_stream(stream, callback):
while True:
line = await stream.readline()
if line:
callback(line)
else:
break
async def _stream_subprocess(cmd, env=None, stdin=None, timeout=None, quiet=False, echo=False) -> _RunOutput:
if echo:
print("\nRunning: ", " ".join(cmd))
p = await asyncio.create_subprocess_exec(
cmd[0],
*cmd[1:],
stdin=stdin,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
)
# note: there is a warning for a possible deadlock when using `wait` with huge amounts of data in the pipe
# https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.asyncio.subprocess.Process.wait
#
# If it starts hanging, will need to switch to the following code. The problem is that no data
# will be seen until it's done and if it hangs for example there will be no debug info.
# out, err = await p.communicate()
# return _RunOutput(p.returncode, out, err)
out = []
err = []
def tee(line, sink, pipe, label=""):
line = line.decode("utf-8").rstrip()
sink.append(line)
if not quiet:
print(label, line, file=pipe)
# XXX: the timeout doesn't seem to make any difference here
await asyncio.wait(
[
_read_stream(p.stdout, lambda l: tee(l, out, sys.stdout)),
_read_stream(p.stderr, lambda l: tee(l, err, sys.stderr, label="stderr:")),
],
timeout=timeout,
)
return _RunOutput(await p.wait(), out, err)
def execute_subprocess_async(cmd, env=None, stdin=None, timeout=180, quiet=False, echo=True) -> _RunOutput:
loop = asyncio.get_event_loop()
result = loop.run_until_complete(
_stream_subprocess(cmd, env=env, stdin=stdin, timeout=timeout, quiet=quiet, echo=echo)
)
cmd_str = " ".join(cmd)
if result.returncode > 0:
raise RuntimeError(
f"'{cmd_str}' failed with returncode {result.returncode} - see the `stderr:` messages from above for details."
)
if not result.stdout:
raise RuntimeError(f"'{cmd_str}' produced no output.")
return result