[testing] port test_trainer_distributed to distributed pytest + TestCasePlus enhancements (#8107)
* move the helper code into testing_utils * port test_trainer_distributed to work with pytest * improve docs * simplify notes * doc * doc * style * doc * further improvements * torch might not be available * real fix * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -476,7 +476,31 @@ class TestCasePlus(unittest.TestCase):
|
||||
"""
|
||||
This class extends `unittest.TestCase` with additional features.
|
||||
|
||||
Feature 1: Flexible auto-removable temp dirs which are guaranteed to get removed at the end of test.
|
||||
Feature 1: A set of fully resolved important file and dir path accessors.
|
||||
|
||||
In tests often we need to know where things are relative to the current test file, and it's not trivial since the
|
||||
test could be invoked from more than one directory or could reside in sub-directories with different depths. This
|
||||
class solves this problem by sorting out all the basic paths and provides easy accessors to them:
|
||||
|
||||
* ``pathlib`` objects (all fully resolved):
|
||||
|
||||
- ``test_file_path`` - the current test file path (=``__file__``)
|
||||
- ``test_file_dir`` - the directory containing the current test file
|
||||
- ``tests_dir`` - the directory of the ``tests`` test suite
|
||||
- ``examples_dir`` - the directory of the ``examples`` test suite
|
||||
- ``repo_root_dir`` - the directory of the repository
|
||||
- ``src_dir`` - the directory of ``src`` (i.e. where the ``transformers`` sub-dir resides)
|
||||
|
||||
* stringified paths---same as above but these return paths as strings, rather than ``pathlib`` objects:
|
||||
|
||||
- ``test_file_path_str``
|
||||
- ``test_file_dir_str``
|
||||
- ``tests_dir_str``
|
||||
- ``examples_dir_str``
|
||||
- ``repo_root_dir_str``
|
||||
- ``src_dir_str``
|
||||
|
||||
Feature 2: Flexible auto-removable temp dirs which are guaranteed to get removed at the end of test.
|
||||
|
||||
In all the following scenarios the temp dir will be auto-removed at the end of test, unless `after=False`.
|
||||
|
||||
@@ -499,7 +523,6 @@ class TestCasePlus(unittest.TestCase):
|
||||
temp results
|
||||
|
||||
::
|
||||
|
||||
def test_whatever(self):
|
||||
tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", after=False)
|
||||
|
||||
@@ -517,11 +540,104 @@ class TestCasePlus(unittest.TestCase):
|
||||
|
||||
Note 2: Each test can register multiple temp dirs and they all will get auto-removed, unless requested otherwise.
|
||||
|
||||
Feature 3: Get a copy of the ``os.environ`` object that sets up ``PYTHONPATH`` specific to the current test suite.
|
||||
This is useful for invoking external programs from the test suite - e.g. distributed training.
|
||||
|
||||
|
||||
::
|
||||
def test_whatever(self):
|
||||
env = self.get_env()
|
||||
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.teardown_tmp_dirs = []
|
||||
|
||||
# figure out the resolved paths for repo_root, tests, examples, etc.
|
||||
self._test_file_path = inspect.getfile(self.__class__)
|
||||
path = Path(self._test_file_path).resolve()
|
||||
self._test_file_dir = path.parents[0]
|
||||
for up in [1, 2, 3]:
|
||||
tmp_dir = path.parents[up]
|
||||
if (tmp_dir / "src").is_dir() and (tmp_dir / "tests").is_dir():
|
||||
break
|
||||
if tmp_dir:
|
||||
self._repo_root_dir = tmp_dir
|
||||
else:
|
||||
raise ValueError(f"can't figure out the root of the repo from {self._test_file_path}")
|
||||
self._tests_dir = self._repo_root_dir / "tests"
|
||||
self._examples_dir = self._repo_root_dir / "examples"
|
||||
self._src_dir = self._repo_root_dir / "src"
|
||||
|
||||
@property
|
||||
def test_file_path(self):
|
||||
return self._test_file_path
|
||||
|
||||
@property
|
||||
def test_file_path_str(self):
|
||||
return str(self._test_file_path)
|
||||
|
||||
@property
|
||||
def test_file_dir(self):
|
||||
return self._test_file_dir
|
||||
|
||||
@property
|
||||
def test_file_dir_str(self):
|
||||
return str(self._test_file_dir)
|
||||
|
||||
@property
|
||||
def tests_dir(self):
|
||||
return self._tests_dir
|
||||
|
||||
@property
|
||||
def tests_dir_str(self):
|
||||
return str(self._tests_dir)
|
||||
|
||||
@property
|
||||
def examples_dir(self):
|
||||
return self._examples_dir
|
||||
|
||||
@property
|
||||
def examples_dir_str(self):
|
||||
return str(self._examples_dir)
|
||||
|
||||
@property
|
||||
def repo_root_dir(self):
|
||||
return self._repo_root_dir
|
||||
|
||||
@property
|
||||
def repo_root_dir_str(self):
|
||||
return str(self._repo_root_dir)
|
||||
|
||||
@property
|
||||
def src_dir(self):
|
||||
return self._src_dir
|
||||
|
||||
@property
|
||||
def src_dir_str(self):
|
||||
return str(self._src_dir)
|
||||
|
||||
def get_env(self):
|
||||
"""
|
||||
Return a copy of the ``os.environ`` object that sets up ``PYTHONPATH`` correctly, depending on the test suite
|
||||
it's invoked from. This is useful for invoking external programs from the test suite - e.g. distributed
|
||||
training.
|
||||
|
||||
It always inserts ``./src`` first, then ``./tests`` or ``./examples`` depending on the test suite type and
|
||||
finally the preset ``PYTHONPATH`` if any (all full resolved paths).
|
||||
|
||||
"""
|
||||
env = os.environ.copy()
|
||||
paths = [self.src_dir_str]
|
||||
if "/examples" in self.test_file_dir_str:
|
||||
paths.append(self.examples_dir_str)
|
||||
else:
|
||||
paths.append(self.tests_dir_str)
|
||||
paths.append(env.get("PYTHONPATH", ""))
|
||||
|
||||
env["PYTHONPATH"] = ":".join(paths)
|
||||
return env
|
||||
|
||||
def get_auto_remove_tmp_dir(self, tmp_dir=None, after=True, before=False):
|
||||
"""
|
||||
Args:
|
||||
@@ -676,3 +792,84 @@ def pytest_terminal_summary_main(tr, id):
|
||||
tr._tw = orig_writer
|
||||
tr.reportchars = orig_reportchars
|
||||
config.option.tbstyle = orig_tbstyle
|
||||
|
||||
|
||||
# the following code deals with async io between processes
|
||||
|
||||
# adapted from https://stackoverflow.com/a/59041913/9201239
|
||||
import asyncio # noqa
|
||||
|
||||
|
||||
class _RunOutput:
|
||||
def __init__(self, returncode, stdout, stderr):
|
||||
self.returncode = returncode
|
||||
self.stdout = stdout
|
||||
self.stderr = stderr
|
||||
|
||||
|
||||
async def _read_stream(stream, callback):
|
||||
while True:
|
||||
line = await stream.readline()
|
||||
if line:
|
||||
callback(line)
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
async def _stream_subprocess(cmd, env=None, stdin=None, timeout=None, quiet=False, echo=False) -> _RunOutput:
|
||||
if echo:
|
||||
print("\nRunning: ", " ".join(cmd))
|
||||
|
||||
p = await asyncio.create_subprocess_exec(
|
||||
cmd[0],
|
||||
*cmd[1:],
|
||||
stdin=stdin,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
)
|
||||
|
||||
# note: there is a warning for a possible deadlock when using `wait` with huge amounts of data in the pipe
|
||||
# https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.asyncio.subprocess.Process.wait
|
||||
#
|
||||
# If it starts hanging, will need to switch to the following code. The problem is that no data
|
||||
# will be seen until it's done and if it hangs for example there will be no debug info.
|
||||
# out, err = await p.communicate()
|
||||
# return _RunOutput(p.returncode, out, err)
|
||||
|
||||
out = []
|
||||
err = []
|
||||
|
||||
def tee(line, sink, pipe, label=""):
|
||||
line = line.decode("utf-8").rstrip()
|
||||
sink.append(line)
|
||||
if not quiet:
|
||||
print(label, line, file=pipe)
|
||||
|
||||
# XXX: the timeout doesn't seem to make any difference here
|
||||
await asyncio.wait(
|
||||
[
|
||||
_read_stream(p.stdout, lambda l: tee(l, out, sys.stdout)),
|
||||
_read_stream(p.stderr, lambda l: tee(l, err, sys.stderr, label="stderr:")),
|
||||
],
|
||||
timeout=timeout,
|
||||
)
|
||||
return _RunOutput(await p.wait(), out, err)
|
||||
|
||||
|
||||
def execute_subprocess_async(cmd, env=None, stdin=None, timeout=180, quiet=False, echo=True) -> _RunOutput:
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
result = loop.run_until_complete(
|
||||
_stream_subprocess(cmd, env=env, stdin=stdin, timeout=timeout, quiet=quiet, echo=echo)
|
||||
)
|
||||
|
||||
cmd_str = " ".join(cmd)
|
||||
if result.returncode > 0:
|
||||
raise RuntimeError(
|
||||
f"'{cmd_str}' failed with returncode {result.returncode} - see the `stderr:` messages from above for details."
|
||||
)
|
||||
if not result.stdout:
|
||||
raise RuntimeError(f"'{cmd_str}' produced no output.")
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user