Mass conversion of documentation from rst to Markdown (#14866)

* Convert docstrings of all configurations and tokenizers

* Processors and fixes

* Last modeling files and fixes to models

* Pipeline modules

* Utils files

* Data submodule

* All the other files

* Style

* Missing examples

* Style again

* Fix copies

* Say bye bye to rst docstrings forever
This commit is contained in:
Sylvain Gugger
2021-12-21 15:06:33 -05:00
committed by GitHub
parent 185876392c
commit 27b3031de2
349 changed files with 14049 additions and 13622 deletions

View File

@@ -688,49 +688,49 @@ class CaptureStd:
"""
Context manager to capture:
- stdout: replay it, clean it up and make it available via ``obj.out``
- stderr: replay it and make it available via ``obj.err``
- stdout: replay it, clean it up and make it available via `obj.out`
- stderr: replay it and make it available via `obj.err`
init arguments:
Args:
out (`bool`, *optional*, defaults to `True`): Whether to capture stdout or not.
err (`bool`, *optional*, defaults to `True`): Whether to capture stderr or not.
replay (`bool`, *optional*, defaults to `True`): Whether to replay or not.
By default each captured stream gets replayed back on context's exit, so that one can see what the test was
doing. If this is a not wanted behavior and the captured data shouldn't be replayed, pass `replay=False`
to disable this feature.
- out - capture stdout:`` True``/``False``, default ``True``
- err - capture stdout: ``True``/``False``, default ``True``
- replay - whether to replay or not: ``True``/``False``, default ``True``. By default each
captured stream gets replayed back on context's exit, so that one can see what the test was doing. If this is a
not wanted behavior and the captured data shouldn't be replayed, pass ``replay=False`` to disable this feature.
Examples:
Examples::
```python
# to capture stdout only with auto-replay
with CaptureStdout() as cs:
print("Secret message")
assert "message" in cs.out
# to capture stdout only with auto-replay
with CaptureStdout() as cs:
print("Secret message")
assert "message" in cs.out
# to capture stderr only with auto-replay
import sys
with CaptureStderr() as cs:
print("Warning: ", file=sys.stderr)
assert "Warning" in cs.err
# to capture stderr only with auto-replay
import sys
with CaptureStderr() as cs:
print("Warning: ", file=sys.stderr)
assert "Warning" in cs.err
# to capture both streams with auto-replay
with CaptureStd() as cs:
print("Secret message")
print("Warning: ", file=sys.stderr)
assert "message" in cs.out
assert "Warning" in cs.err
# to capture both streams with auto-replay
with CaptureStd() as cs:
print("Secret message")
print("Warning: ", file=sys.stderr)
assert "message" in cs.out
assert "Warning" in cs.err
# to capture just one of the streams, and not the other, with auto-replay
with CaptureStd(err=False) as cs:
print("Secret message")
assert "message" in cs.out
# but best use the stream-specific subclasses
# to capture just one of the streams, and not the other, with auto-replay
with CaptureStd(err=False) as cs:
print("Secret message")
assert "message" in cs.out
# but best use the stream-specific subclasses
# to capture without auto-replay
with CaptureStd(replay=False) as cs:
print("Secret message")
assert "message" in cs.out
"""
# to capture without auto-replay
with CaptureStd(replay=False) as cs:
print("Secret message")
assert "message" in cs.out
```"""
def __init__(self, out=True, err=True, replay=True):
@@ -810,23 +810,24 @@ class CaptureLogger:
Context manager to capture `logging` streams
Args:
logger: 'logging` logger object
- logger: 'logging` logger object
Results:
Returns:
The captured output is available via `self.out`
Example::
Example:
>>> from transformers import logging
>>> from transformers.testing_utils import CaptureLogger
```python
>>> from transformers import logging
>>> from transformers.testing_utils import CaptureLogger
>>> msg = "Testing 1, 2, 3"
>>> logging.set_verbosity_info()
>>> logger = logging.get_logger("transformers.models.bart.tokenization_bart")
>>> with CaptureLogger(logger) as cl:
... logger.info(msg)
>>> assert cl.out, msg+"\n"
>>> msg = "Testing 1, 2, 3"
>>> logging.set_verbosity_info()
>>> logger = logging.get_logger("transformers.models.bart.tokenization_bart")
>>> with CaptureLogger(logger) as cl:
... logger.info(msg)
>>> assert cl.out, msg+"\n"
```
"""
def __init__(self, logger):
@@ -853,11 +854,12 @@ def LoggingLevel(level):
This is a context manager to temporarily change transformers modules logging level to the desired value and have it
restored to the original setting at the end of the scope.
For example ::
with LoggingLevel(logging.INFO):
AutoModel.from_pretrained("gpt2") # calls logger.info() several times
Example:
```python
with LoggingLevel(logging.INFO):
AutoModel.from_pretrained("gpt2") # calls logger.info() several times
```
"""
orig_level = transformers_logging.get_verbosity()
try:
@@ -873,11 +875,12 @@ def ExtendSysPath(path: Union[str, os.PathLike]) -> Iterator[None]:
"""
Temporary add given path to `sys.path`.
Usage ::
with ExtendSysPath('/path/to/dir'):
mymodule = importlib.import_module('mymodule')
Usage :
```python
with ExtendSysPath('/path/to/dir'):
mymodule = importlib.import_module('mymodule')
```
"""
path = os.fspath(path)
@@ -890,7 +893,7 @@ def ExtendSysPath(path: Union[str, os.PathLike]) -> Iterator[None]:
class TestCasePlus(unittest.TestCase):
"""
This class extends `unittest.TestCase` with additional features.
This class extends *unittest.TestCase* with additional features.
Feature 1: A set of fully resolved important file and dir path accessors.
@@ -898,75 +901,74 @@ class TestCasePlus(unittest.TestCase):
test could be invoked from more than one directory or could reside in sub-directories with different depths. This
class solves this problem by sorting out all the basic paths and provides easy accessors to them:
* ``pathlib`` objects (all fully resolved):
- `pathlib` objects (all fully resolved):
- ``test_file_path`` - the current test file path (=``__file__``)
- ``test_file_dir`` - the directory containing the current test file
- ``tests_dir`` - the directory of the ``tests`` test suite
- ``examples_dir`` - the directory of the ``examples`` test suite
- ``repo_root_dir`` - the directory of the repository
- ``src_dir`` - the directory of ``src`` (i.e. where the ``transformers`` sub-dir resides)
- `test_file_path` - the current test file path (=`__file__`)
- `test_file_dir` - the directory containing the current test file
- `tests_dir` - the directory of the `tests` test suite
- `examples_dir` - the directory of the `examples` test suite
- `repo_root_dir` - the directory of the repository
- `src_dir` - the directory of `src` (i.e. where the `transformers` sub-dir resides)
* stringified paths---same as above but these return paths as strings, rather than ``pathlib`` objects:
- stringified paths---same as above but these return paths as strings, rather than `pathlib` objects:
- ``test_file_path_str``
- ``test_file_dir_str``
- ``tests_dir_str``
- ``examples_dir_str``
- ``repo_root_dir_str``
- ``src_dir_str``
- `test_file_path_str`
- `test_file_dir_str`
- `tests_dir_str`
- `examples_dir_str`
- `repo_root_dir_str`
- `src_dir_str`
Feature 2: Flexible auto-removable temporary dirs which are guaranteed to get removed at the end of test.
1. Create a unique temporary dir:
::
```python
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir()
```
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir()
``tmp_dir`` will contain the path to the created temporary dir. It will be automatically removed at the end of the
`tmp_dir` will contain the path to the created temporary dir. It will be automatically removed at the end of the
test.
2. Create a temporary dir of my choice, ensure it's empty before the test starts and don't
empty it after the test.
::
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
```python
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
```
This is useful for debug when you want to monitor a specific directory and want to make sure the previous tests
didn't leave any data in there.
3. You can override the first two options by directly overriding the ``before`` and ``after`` args, leading to the
following behavior:
3. You can override the first two options by directly overriding the `before` and `after` args, leading to the
following behavior:
``before=True``: the temporary dir will always be cleared at the beginning of the test.
`before=True`: the temporary dir will always be cleared at the beginning of the test.
``before=False``: if the temporary dir already existed, any existing files will remain there.
`before=False`: if the temporary dir already existed, any existing files will remain there.
``after=True``: the temporary dir will always be deleted at the end of the test.
`after=True`: the temporary dir will always be deleted at the end of the test.
``after=False``: the temporary dir will always be left intact at the end of the test.
`after=False`: the temporary dir will always be left intact at the end of the test.
Note 1: In order to run the equivalent of ``rm -r`` safely, only subdirs of the project repository checkout are
allowed if an explicit ``tmp_dir`` is used, so that by mistake no ``/tmp`` or similar important part of the
filesystem will get nuked. i.e. please always pass paths that start with ``./``
Note 1: In order to run the equivalent of `rm -r` safely, only subdirs of the project repository checkout are
allowed if an explicit `tmp_dir` is used, so that by mistake no `/tmp` or similar important part of the
filesystem will get nuked. i.e. please always pass paths that start with `./`
Note 2: Each test can register multiple temporary dirs and they all will get auto-removed, unless requested
otherwise.
Feature 3: Get a copy of the ``os.environ`` object that sets up ``PYTHONPATH`` specific to the current test suite.
Feature 3: Get a copy of the `os.environ` object that sets up `PYTHONPATH` specific to the current test suite.
This is useful for invoking external programs from the test suite - e.g. distributed training.
::
def test_whatever(self):
env = self.get_env()
"""
```python
def test_whatever(self):
env = self.get_env()
```"""
def setUp(self):
# get_auto_remove_tmp_dir feature:
@@ -1038,12 +1040,12 @@ class TestCasePlus(unittest.TestCase):
def get_env(self):
"""
Return a copy of the ``os.environ`` object that sets up ``PYTHONPATH`` correctly, depending on the test suite
Return a copy of the `os.environ` object that sets up `PYTHONPATH` correctly, depending on the test suite
it's invoked from. This is useful for invoking external programs from the test suite - e.g. distributed
training.
It always inserts ``./src`` first, then ``./tests`` or ``./examples`` depending on the test suite type and
finally the preset ``PYTHONPATH`` if any (all full resolved paths).
It always inserts `./src` first, then `./tests` or `./examples` depending on the test suite type and
finally the preset `PYTHONPATH` if any (all full resolved paths).
"""
env = os.environ.copy()
@@ -1060,26 +1062,26 @@ class TestCasePlus(unittest.TestCase):
def get_auto_remove_tmp_dir(self, tmp_dir=None, before=None, after=None):
"""
Args:
tmp_dir (:obj:`string`, `optional`):
if :obj:`None`:
tmp_dir (`string`, *optional*):
if `None`:
- a unique temporary path will be created
- sets ``before=True`` if ``before`` is :obj:`None`
- sets ``after=True`` if ``after`` is :obj:`None`
- sets `before=True` if `before` is `None`
- sets `after=True` if `after` is `None`
else:
- :obj:`tmp_dir` will be created
- sets ``before=True`` if ``before`` is :obj:`None`
- sets ``after=False`` if ``after`` is :obj:`None`
before (:obj:`bool`, `optional`):
If :obj:`True` and the :obj:`tmp_dir` already exists, make sure to empty it right away if :obj:`False`
and the :obj:`tmp_dir` already exists, any existing files will remain there.
after (:obj:`bool`, `optional`):
If :obj:`True`, delete the :obj:`tmp_dir` at the end of the test if :obj:`False`, leave the
:obj:`tmp_dir` and its contents intact at the end of the test.
- `tmp_dir` will be created
- sets `before=True` if `before` is `None`
- sets `after=False` if `after` is `None`
before (`bool`, *optional*):
If `True` and the `tmp_dir` already exists, make sure to empty it right away if `False`
and the `tmp_dir` already exists, any existing files will remain there.
after (`bool`, *optional*):
If `True`, delete the `tmp_dir` at the end of the test if `False`, leave the
`tmp_dir` and its contents intact at the end of the test.
Returns:
tmp_dir(:obj:`string`): either the same value as passed via `tmp_dir` or the path to the auto-selected tmp
tmp_dir(`string`): either the same value as passed via *tmp_dir* or the path to the auto-selected tmp
dir
"""
if tmp_dir is not None:
@@ -1152,9 +1154,9 @@ def mockenv(**kwargs):
@contextlib.contextmanager
def mockenv_context(*remove, **update):
"""
Temporarily updates the ``os.environ`` dictionary in-place. Similar to mockenv
Temporarily updates the `os.environ` dictionary in-place. Similar to mockenv
The ``os.environ`` dictionary is updated in-place so that the modification is sure to work in all situations.
The `os.environ` dictionary is updated in-place so that the modification is sure to work in all situations.
Args:
remove: Environment variables to remove.
@@ -1423,8 +1425,8 @@ def execute_subprocess_async(cmd, env=None, stdin=None, timeout=180, quiet=False
def pytest_xdist_worker_id():
"""
Returns an int value of worker's numerical id under ``pytest-xdist``'s concurrent workers ``pytest -n N`` regime,
or 0 if ``-n 1`` or ``pytest-xdist`` isn't being used.
Returns an int value of worker's numerical id under `pytest-xdist`'s concurrent workers `pytest -n N` regime,
or 0 if `-n 1` or `pytest-xdist` isn't being used.
"""
worker = os.environ.get("PYTEST_XDIST_WORKER", "gw0")
worker = re.sub(r"^gw", "", worker, 0, re.M)
@@ -1433,9 +1435,9 @@ def pytest_xdist_worker_id():
def get_torch_dist_unique_port():
"""
Returns a port number that can be fed to ``torch.distributed.launch``'s ``--master_port`` argument.
Returns a port number that can be fed to `torch.distributed.launch`'s `--master_port` argument.
Under ``pytest-xdist`` it adds a delta number based on a worker id so that concurrent tests don't try to use the
Under `pytest-xdist` it adds a delta number based on a worker id so that concurrent tests don't try to use the
same port at once.
"""
port = 29500