Indent code block in the documentation (#11233)
* Indent code block * Indent code blocks version 2 * Quality
This commit is contained in:
@@ -388,7 +388,7 @@ Next, you can finally start adding new code to 🤗 Transformers. Go into the cl
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
cd transformers
|
cd transformers
|
||||||
|
|
||||||
In the special case that you are adding a model whose architecture exactly matches the model architecture of an
|
In the special case that you are adding a model whose architecture exactly matches the model architecture of an
|
||||||
existing model you only have to add a conversion script as described in `this section <#write-a-conversion-script>`__.
|
existing model you only have to add a conversion script as described in `this section <#write-a-conversion-script>`__.
|
||||||
@@ -417,27 +417,27 @@ You should do the following:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
git checkout -b add_brand_new_bert
|
git checkout -b add_brand_new_bert
|
||||||
|
|
||||||
2. Commit the automatically generated code:
|
2. Commit the automatically generated code:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
git add .
|
git add .
|
||||||
git commit
|
git commit
|
||||||
|
|
||||||
3. Fetch and rebase to current master
|
3. Fetch and rebase to current master
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
git fetch upstream
|
git fetch upstream
|
||||||
git rebase upstream/master
|
git rebase upstream/master
|
||||||
|
|
||||||
4. Push the changes to your account using:
|
4. Push the changes to your account using:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
git push -u origin a-descriptive-name-for-my-changes
|
git push -u origin a-descriptive-name-for-my-changes
|
||||||
|
|
||||||
5. Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
|
5. Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
|
||||||
GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
|
GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
|
||||||
@@ -451,8 +451,8 @@ time to time by doing:
|
|||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
git fetch upstream
|
git fetch upstream
|
||||||
git merge upstream/master
|
git merge upstream/master
|
||||||
|
|
||||||
In general, all questions you might have regarding the model or your implementation should be asked in your PR and
|
In general, all questions you might have regarding the model or your implementation should be asked in your PR and
|
||||||
discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
|
discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
|
||||||
|
|||||||
@@ -47,12 +47,12 @@ Here is an example of the conversion process for a pre-trained ``BERT-Base Uncas
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export BERT_BASE_DIR=/path/to/bert/uncased_L-12_H-768_A-12
|
export BERT_BASE_DIR=/path/to/bert/uncased_L-12_H-768_A-12
|
||||||
|
|
||||||
transformers-cli convert --model_type bert \
|
transformers-cli convert --model_type bert \
|
||||||
--tf_checkpoint $BERT_BASE_DIR/bert_model.ckpt \
|
--tf_checkpoint $BERT_BASE_DIR/bert_model.ckpt \
|
||||||
--config $BERT_BASE_DIR/bert_config.json \
|
--config $BERT_BASE_DIR/bert_config.json \
|
||||||
--pytorch_dump_output $BERT_BASE_DIR/pytorch_model.bin
|
--pytorch_dump_output $BERT_BASE_DIR/pytorch_model.bin
|
||||||
|
|
||||||
You can download Google's pre-trained models for the conversion `here
|
You can download Google's pre-trained models for the conversion `here
|
||||||
<https://github.com/google-research/bert#pre-trained-models>`__.
|
<https://github.com/google-research/bert#pre-trained-models>`__.
|
||||||
@@ -72,12 +72,12 @@ Here is an example of the conversion process for the pre-trained ``ALBERT Base``
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export ALBERT_BASE_DIR=/path/to/albert/albert_base
|
export ALBERT_BASE_DIR=/path/to/albert/albert_base
|
||||||
|
|
||||||
transformers-cli convert --model_type albert \
|
transformers-cli convert --model_type albert \
|
||||||
--tf_checkpoint $ALBERT_BASE_DIR/model.ckpt-best \
|
--tf_checkpoint $ALBERT_BASE_DIR/model.ckpt-best \
|
||||||
--config $ALBERT_BASE_DIR/albert_config.json \
|
--config $ALBERT_BASE_DIR/albert_config.json \
|
||||||
--pytorch_dump_output $ALBERT_BASE_DIR/pytorch_model.bin
|
--pytorch_dump_output $ALBERT_BASE_DIR/pytorch_model.bin
|
||||||
|
|
||||||
You can download Google's pre-trained models for the conversion `here
|
You can download Google's pre-trained models for the conversion `here
|
||||||
<https://github.com/google-research/albert#pre-trained-models>`__.
|
<https://github.com/google-research/albert#pre-trained-models>`__.
|
||||||
@@ -91,13 +91,13 @@ save as the same format than OpenAI pretrained model (see `here <https://github.
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export OPENAI_GPT_CHECKPOINT_FOLDER_PATH=/path/to/openai/pretrained/numpy/weights
|
export OPENAI_GPT_CHECKPOINT_FOLDER_PATH=/path/to/openai/pretrained/numpy/weights
|
||||||
|
|
||||||
transformers-cli convert --model_type gpt \
|
transformers-cli convert --model_type gpt \
|
||||||
--tf_checkpoint $OPENAI_GPT_CHECKPOINT_FOLDER_PATH \
|
--tf_checkpoint $OPENAI_GPT_CHECKPOINT_FOLDER_PATH \
|
||||||
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
|
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
|
||||||
[--config OPENAI_GPT_CONFIG] \
|
[--config OPENAI_GPT_CONFIG] \
|
||||||
[--finetuning_task_name OPENAI_GPT_FINETUNED_TASK] \
|
[--finetuning_task_name OPENAI_GPT_FINETUNED_TASK] \
|
||||||
|
|
||||||
|
|
||||||
OpenAI GPT-2
|
OpenAI GPT-2
|
||||||
@@ -108,13 +108,13 @@ Here is an example of the conversion process for a pre-trained OpenAI GPT-2 mode
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export OPENAI_GPT2_CHECKPOINT_PATH=/path/to/gpt2/pretrained/weights
|
export OPENAI_GPT2_CHECKPOINT_PATH=/path/to/gpt2/pretrained/weights
|
||||||
|
|
||||||
transformers-cli convert --model_type gpt2 \
|
transformers-cli convert --model_type gpt2 \
|
||||||
--tf_checkpoint $OPENAI_GPT2_CHECKPOINT_PATH \
|
--tf_checkpoint $OPENAI_GPT2_CHECKPOINT_PATH \
|
||||||
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
|
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
|
||||||
[--config OPENAI_GPT2_CONFIG] \
|
[--config OPENAI_GPT2_CONFIG] \
|
||||||
[--finetuning_task_name OPENAI_GPT2_FINETUNED_TASK]
|
[--finetuning_task_name OPENAI_GPT2_FINETUNED_TASK]
|
||||||
|
|
||||||
Transformer-XL
|
Transformer-XL
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
@@ -124,13 +124,13 @@ Here is an example of the conversion process for a pre-trained Transformer-XL mo
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export TRANSFO_XL_CHECKPOINT_FOLDER_PATH=/path/to/transfo/xl/checkpoint
|
export TRANSFO_XL_CHECKPOINT_FOLDER_PATH=/path/to/transfo/xl/checkpoint
|
||||||
|
|
||||||
transformers-cli convert --model_type transfo_xl \
|
transformers-cli convert --model_type transfo_xl \
|
||||||
--tf_checkpoint $TRANSFO_XL_CHECKPOINT_FOLDER_PATH \
|
--tf_checkpoint $TRANSFO_XL_CHECKPOINT_FOLDER_PATH \
|
||||||
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
|
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
|
||||||
[--config TRANSFO_XL_CONFIG] \
|
[--config TRANSFO_XL_CONFIG] \
|
||||||
[--finetuning_task_name TRANSFO_XL_FINETUNED_TASK]
|
[--finetuning_task_name TRANSFO_XL_FINETUNED_TASK]
|
||||||
|
|
||||||
|
|
||||||
XLNet
|
XLNet
|
||||||
@@ -140,14 +140,14 @@ Here is an example of the conversion process for a pre-trained XLNet model:
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export TRANSFO_XL_CHECKPOINT_PATH=/path/to/xlnet/checkpoint
|
export TRANSFO_XL_CHECKPOINT_PATH=/path/to/xlnet/checkpoint
|
||||||
export TRANSFO_XL_CONFIG_PATH=/path/to/xlnet/config
|
export TRANSFO_XL_CONFIG_PATH=/path/to/xlnet/config
|
||||||
|
|
||||||
transformers-cli convert --model_type xlnet \
|
transformers-cli convert --model_type xlnet \
|
||||||
--tf_checkpoint $TRANSFO_XL_CHECKPOINT_PATH \
|
--tf_checkpoint $TRANSFO_XL_CHECKPOINT_PATH \
|
||||||
--config $TRANSFO_XL_CONFIG_PATH \
|
--config $TRANSFO_XL_CONFIG_PATH \
|
||||||
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
|
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
|
||||||
[--finetuning_task_name XLNET_FINETUNED_TASK] \
|
[--finetuning_task_name XLNET_FINETUNED_TASK] \
|
||||||
|
|
||||||
|
|
||||||
XLM
|
XLM
|
||||||
@@ -157,13 +157,13 @@ Here is an example of the conversion process for a pre-trained XLM model:
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export XLM_CHECKPOINT_PATH=/path/to/xlm/checkpoint
|
export XLM_CHECKPOINT_PATH=/path/to/xlm/checkpoint
|
||||||
|
|
||||||
transformers-cli convert --model_type xlm \
|
transformers-cli convert --model_type xlm \
|
||||||
--tf_checkpoint $XLM_CHECKPOINT_PATH \
|
--tf_checkpoint $XLM_CHECKPOINT_PATH \
|
||||||
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT
|
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT
|
||||||
[--config XML_CONFIG] \
|
[--config XML_CONFIG] \
|
||||||
[--finetuning_task_name XML_FINETUNED_TASK]
|
[--finetuning_task_name XML_FINETUNED_TASK]
|
||||||
|
|
||||||
|
|
||||||
T5
|
T5
|
||||||
@@ -173,9 +173,9 @@ Here is an example of the conversion process for a pre-trained T5 model:
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export T5=/path/to/t5/uncased_L-12_H-768_A-12
|
export T5=/path/to/t5/uncased_L-12_H-768_A-12
|
||||||
|
|
||||||
transformers-cli convert --model_type t5 \
|
transformers-cli convert --model_type t5 \
|
||||||
--tf_checkpoint $T5/t5_model.ckpt \
|
--tf_checkpoint $T5/t5_model.ckpt \
|
||||||
--config $T5/t5_config.json \
|
--config $T5/t5_config.json \
|
||||||
--pytorch_dump_output $T5/pytorch_model.bin
|
--pytorch_dump_output $T5/pytorch_model.bin
|
||||||
|
|||||||
@@ -182,7 +182,7 @@ such:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
>>> # [CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
|
>>> # [CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
|
||||||
|
|
||||||
We can use our tokenizer to automatically generate such a sentence by passing the two sequences to ``tokenizer`` as two
|
We can use our tokenizer to automatically generate such a sentence by passing the two sequences to ``tokenizer`` as two
|
||||||
arguments (and not a list, like before) like this:
|
arguments (and not a list, like before) like this:
|
||||||
|
|||||||
@@ -293,33 +293,33 @@ with it, you may want to try one of:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pip install fairscale --no-build-isolation .
|
pip install fairscale --no-build-isolation .
|
||||||
|
|
||||||
or:
|
or:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
git clone https://github.com/facebookresearch/fairscale/
|
git clone https://github.com/facebookresearch/fairscale/
|
||||||
cd fairscale
|
cd fairscale
|
||||||
rm -r dist build
|
rm -r dist build
|
||||||
python setup.py bdist_wheel
|
python setup.py bdist_wheel
|
||||||
pip uninstall -y fairscale
|
pip uninstall -y fairscale
|
||||||
pip install dist/fairscale-*.whl
|
pip install dist/fairscale-*.whl
|
||||||
|
|
||||||
``fairscale`` also has issues with building against pytorch-nightly, so if you use it you may have to try one of:
|
``fairscale`` also has issues with building against pytorch-nightly, so if you use it you may have to try one of:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pip uninstall -y fairscale; pip install fairscale --pre \
|
pip uninstall -y fairscale; pip install fairscale --pre \
|
||||||
-f https://download.pytorch.org/whl/nightly/cu110/torch_nightly.html \
|
-f https://download.pytorch.org/whl/nightly/cu110/torch_nightly.html \
|
||||||
--no-cache --no-build-isolation
|
--no-cache --no-build-isolation
|
||||||
|
|
||||||
or:
|
or:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pip install -v --disable-pip-version-check . \
|
pip install -v --disable-pip-version-check . \
|
||||||
-f https://download.pytorch.org/whl/nightly/cu110/torch_nightly.html --pre
|
-f https://download.pytorch.org/whl/nightly/cu110/torch_nightly.html --pre
|
||||||
|
|
||||||
Of course, adjust the urls to match the cuda version you use.
|
Of course, adjust the urls to match the cuda version you use.
|
||||||
|
|
||||||
@@ -447,12 +447,12 @@ To make a local build for DeepSpeed:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
git clone https://github.com/microsoft/DeepSpeed/
|
git clone https://github.com/microsoft/DeepSpeed/
|
||||||
cd DeepSpeed
|
cd DeepSpeed
|
||||||
rm -rf build
|
rm -rf build
|
||||||
TORCH_CUDA_ARCH_LIST="6.1;8.6" DS_BUILD_OPS=1 pip install . \
|
TORCH_CUDA_ARCH_LIST="6.1;8.6" DS_BUILD_OPS=1 pip install . \
|
||||||
--global-option="build_ext" --global-option="-j8" --no-cache -v \
|
--global-option="build_ext" --global-option="-j8" --no-cache -v \
|
||||||
--disable-pip-version-check 2>&1 | tee build.log
|
--disable-pip-version-check 2>&1 | tee build.log
|
||||||
|
|
||||||
Edit ``TORCH_CUDA_ARCH_LIST`` to insert the code for the architectures of the GPU cards you intend to use.
|
Edit ``TORCH_CUDA_ARCH_LIST`` to insert the code for the architectures of the GPU cards you intend to use.
|
||||||
|
|
||||||
@@ -460,11 +460,11 @@ Or if you need to use the same setup on multiple machines, make a binary wheel:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
git clone https://github.com/microsoft/DeepSpeed/
|
git clone https://github.com/microsoft/DeepSpeed/
|
||||||
cd DeepSpeed
|
cd DeepSpeed
|
||||||
rm -rf build
|
rm -rf build
|
||||||
TORCH_CUDA_ARCH_LIST="6.1;8.6" DS_BUILD_OPS=1 \
|
TORCH_CUDA_ARCH_LIST="6.1;8.6" DS_BUILD_OPS=1 \
|
||||||
python setup.py build_ext -j8 bdist_wheel
|
python setup.py build_ext -j8 bdist_wheel
|
||||||
|
|
||||||
it will generate something like ``dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl`` which now you can install
|
it will generate something like ``dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl`` which now you can install
|
||||||
as ``pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl`` locally or on any other machine.
|
as ``pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl`` locally or on any other machine.
|
||||||
@@ -478,20 +478,20 @@ You can check the archs pytorch was built with using:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
python -c "import torch; print(torch.cuda.get_arch_list())"
|
python -c "import torch; print(torch.cuda.get_arch_list())"
|
||||||
|
|
||||||
Here is how to find out the arch for one of the installed GPU. For example, for GPU 0:
|
Here is how to find out the arch for one of the installed GPU. For example, for GPU 0:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
|
CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
|
||||||
print(torch.cuda.get_device_properties(torch.device('cuda')))"
|
print(torch.cuda.get_device_properties(torch.device('cuda')))"
|
||||||
|
|
||||||
If the output is:
|
If the output is:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)
|
_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)
|
||||||
|
|
||||||
then you know that this card's arch is ``8.6``.
|
then you know that this card's arch is ``8.6``.
|
||||||
|
|
||||||
@@ -591,18 +591,18 @@ with DeepSpeed is to have at least the following configuration in the configurat
|
|||||||
|
|
||||||
.. code-block:: json
|
.. code-block:: json
|
||||||
|
|
||||||
{
|
{
|
||||||
"zero_optimization": {
|
"zero_optimization": {
|
||||||
"stage": 2,
|
"stage": 2,
|
||||||
"allgather_partitions": true,
|
"allgather_partitions": true,
|
||||||
"allgather_bucket_size": 2e8,
|
"allgather_bucket_size": 2e8,
|
||||||
"reduce_scatter": true,
|
"reduce_scatter": true,
|
||||||
"reduce_bucket_size": 2e8,
|
"reduce_bucket_size": 2e8,
|
||||||
"overlap_comm": true,
|
"overlap_comm": true,
|
||||||
"contiguous_gradients": true,
|
"contiguous_gradients": true,
|
||||||
"cpu_offload": true
|
"cpu_offload": true
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
which enables ``cpu_offload`` and some other important features. You may experiment with the buffer sizes, you will
|
which enables ``cpu_offload`` and some other important features. You may experiment with the buffer sizes, you will
|
||||||
find more details in the discussion below.
|
find more details in the discussion below.
|
||||||
@@ -710,18 +710,18 @@ shell from a cell. For example, to use ``run_translation.py`` you would launch i
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
!git clone https://github.com/huggingface/transformers
|
!git clone https://github.com/huggingface/transformers
|
||||||
!cd transformers; deepspeed examples/seq2seq/run_translation.py ...
|
!cd transformers; deepspeed examples/seq2seq/run_translation.py ...
|
||||||
|
|
||||||
or with ``%%bash`` magic, where you can write a multi-line code for the shell program to run:
|
or with ``%%bash`` magic, where you can write a multi-line code for the shell program to run:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
%%bash
|
%%bash
|
||||||
|
|
||||||
git clone https://github.com/huggingface/transformers
|
git clone https://github.com/huggingface/transformers
|
||||||
cd transformers
|
cd transformers
|
||||||
deepspeed examples/seq2seq/run_translation.py ...
|
deepspeed examples/seq2seq/run_translation.py ...
|
||||||
|
|
||||||
In such case you don't need any of the code presented at the beginning of this section.
|
In such case you don't need any of the code presented at the beginning of this section.
|
||||||
|
|
||||||
@@ -743,16 +743,16 @@ repo <https://github.com/microsoft/DeepSpeedExamples>`__:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
git clone https://github.com/microsoft/DeepSpeedExamples
|
git clone https://github.com/microsoft/DeepSpeedExamples
|
||||||
cd DeepSpeedExamples
|
cd DeepSpeedExamples
|
||||||
find . -name '*json'
|
find . -name '*json'
|
||||||
|
|
||||||
Continuing the code from above, let's say you're looking to configure the Lamb optimizer. So you can search through the
|
Continuing the code from above, let's say you're looking to configure the Lamb optimizer. So you can search through the
|
||||||
example ``.json`` files with:
|
example ``.json`` files with:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
grep -i Lamb $(find . -name '*json')
|
grep -i Lamb $(find . -name '*json')
|
||||||
|
|
||||||
Some more examples are to be found in the `main repo <https://github.com/microsoft/DeepSpeed>`__ as well.
|
Some more examples are to be found in the `main repo <https://github.com/microsoft/DeepSpeed>`__ as well.
|
||||||
|
|
||||||
@@ -1020,49 +1020,49 @@ Here is a full ZeRO-2 all-enabled configuration file ``ds_config_zero2.json``:
|
|||||||
|
|
||||||
.. code-block:: json
|
.. code-block:: json
|
||||||
|
|
||||||
{
|
{
|
||||||
"fp16": {
|
"fp16": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"loss_scale": 0,
|
"loss_scale": 0,
|
||||||
"loss_scale_window": 1000,
|
"loss_scale_window": 1000,
|
||||||
"initial_scale_power": 16,
|
"initial_scale_power": 16,
|
||||||
"hysteresis": 2,
|
"hysteresis": 2,
|
||||||
"min_loss_scale": 1
|
"min_loss_scale": 1
|
||||||
},
|
},
|
||||||
|
|
||||||
"zero_optimization": {
|
"zero_optimization": {
|
||||||
"stage": 2,
|
"stage": 2,
|
||||||
"allgather_partitions": true,
|
"allgather_partitions": true,
|
||||||
"allgather_bucket_size": 2e8,
|
"allgather_bucket_size": 2e8,
|
||||||
"overlap_comm": true,
|
"overlap_comm": true,
|
||||||
"reduce_scatter": true,
|
"reduce_scatter": true,
|
||||||
"reduce_bucket_size": 2e8,
|
"reduce_bucket_size": 2e8,
|
||||||
"contiguous_gradients": true,
|
"contiguous_gradients": true,
|
||||||
"cpu_offload": true
|
"cpu_offload": true
|
||||||
},
|
},
|
||||||
|
|
||||||
"optimizer": {
|
"optimizer": {
|
||||||
"type": "AdamW",
|
"type": "AdamW",
|
||||||
"params": {
|
"params": {
|
||||||
"lr": 3e-5,
|
"lr": 3e-5,
|
||||||
"betas": [0.8, 0.999],
|
"betas": [0.8, 0.999],
|
||||||
"eps": 1e-8,
|
"eps": 1e-8,
|
||||||
"weight_decay": 3e-7
|
"weight_decay": 3e-7
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"scheduler": {
|
"scheduler": {
|
||||||
"type": "WarmupLR",
|
"type": "WarmupLR",
|
||||||
"params": {
|
"params": {
|
||||||
"warmup_min_lr": 0,
|
"warmup_min_lr": 0,
|
||||||
"warmup_max_lr": 3e-5,
|
"warmup_max_lr": 3e-5,
|
||||||
"warmup_num_steps": 500
|
"warmup_num_steps": 500
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"steps_per_print": 2000,
|
"steps_per_print": 2000,
|
||||||
"wall_clock_breakdown": false
|
"wall_clock_breakdown": false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1073,54 +1073,54 @@ Here is a full ZeRO-3 all-enabled configuration file ``ds_config_zero3.json``:
|
|||||||
|
|
||||||
.. code-block:: json
|
.. code-block:: json
|
||||||
|
|
||||||
{
|
{
|
||||||
"fp16": {
|
"fp16": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"loss_scale": 0,
|
"loss_scale": 0,
|
||||||
"loss_scale_window": 1000,
|
"loss_scale_window": 1000,
|
||||||
"initial_scale_power": 16,
|
"initial_scale_power": 16,
|
||||||
"hysteresis": 2,
|
"hysteresis": 2,
|
||||||
"min_loss_scale": 1
|
"min_loss_scale": 1
|
||||||
},
|
},
|
||||||
|
|
||||||
"zero_optimization": {
|
"zero_optimization": {
|
||||||
"stage": 3,
|
"stage": 3,
|
||||||
"cpu_offload": true,
|
"cpu_offload": true,
|
||||||
"cpu_offload_params": true,
|
"cpu_offload_params": true,
|
||||||
"cpu_offload_use_pin_memory" : true,
|
"cpu_offload_use_pin_memory" : true,
|
||||||
"overlap_comm": true,
|
"overlap_comm": true,
|
||||||
"contiguous_gradients": true,
|
"contiguous_gradients": true,
|
||||||
"sub_group_size": 1e14,
|
"sub_group_size": 1e14,
|
||||||
"reduce_bucket_size": 1e6,
|
"reduce_bucket_size": 1e6,
|
||||||
"stage3_prefetch_bucket_size": 0.94e6,
|
"stage3_prefetch_bucket_size": 0.94e6,
|
||||||
"stage3_param_persistence_threshold": 1e4,
|
"stage3_param_persistence_threshold": 1e4,
|
||||||
"stage3_max_live_parameters": 1e9,
|
"stage3_max_live_parameters": 1e9,
|
||||||
"stage3_max_reuse_distance": 1e9,
|
"stage3_max_reuse_distance": 1e9,
|
||||||
"stage3_gather_fp16_weights_on_model_save": true
|
"stage3_gather_fp16_weights_on_model_save": true
|
||||||
},
|
},
|
||||||
|
|
||||||
"optimizer": {
|
"optimizer": {
|
||||||
"type": "AdamW",
|
"type": "AdamW",
|
||||||
"params": {
|
"params": {
|
||||||
"lr": 3e-5,
|
"lr": 3e-5,
|
||||||
"betas": [0.8, 0.999],
|
"betas": [0.8, 0.999],
|
||||||
"eps": 1e-8,
|
"eps": 1e-8,
|
||||||
"weight_decay": 3e-7
|
"weight_decay": 3e-7
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"scheduler": {
|
"scheduler": {
|
||||||
"type": "WarmupLR",
|
"type": "WarmupLR",
|
||||||
"params": {
|
"params": {
|
||||||
"warmup_min_lr": 0,
|
"warmup_min_lr": 0,
|
||||||
"warmup_max_lr": 3e-5,
|
"warmup_max_lr": 3e-5,
|
||||||
"warmup_num_steps": 500
|
"warmup_num_steps": 500
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"steps_per_print": 2000,
|
"steps_per_print": 2000,
|
||||||
"wall_clock_breakdown": false
|
"wall_clock_breakdown": false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Optimizer and Scheduler
|
Optimizer and Scheduler
|
||||||
@@ -1367,26 +1367,26 @@ Let's say your checkpoint folder looks like this:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ ls -l output_dir/checkpoint-1/
|
$ ls -l output_dir/checkpoint-1/
|
||||||
-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
|
-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
|
||||||
drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
|
drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
|
||||||
-rw-rw-r-- 1 stas stas 12 Mar 27 13:16 latest
|
-rw-rw-r-- 1 stas stas 12 Mar 27 13:16 latest
|
||||||
-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
|
-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
|
||||||
-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
|
-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
|
||||||
-rw-rw-r-- 1 stas stas 623 Mar 27 20:42 scheduler.pt
|
-rw-rw-r-- 1 stas stas 623 Mar 27 20:42 scheduler.pt
|
||||||
-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
|
-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
|
||||||
-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
|
-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
|
||||||
-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
|
-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
|
||||||
-rw-rw-r-- 1 stas stas 339 Mar 27 20:42 trainer_state.json
|
-rw-rw-r-- 1 stas stas 339 Mar 27 20:42 trainer_state.json
|
||||||
-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
|
-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
|
||||||
-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
|
-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
|
||||||
|
|
||||||
In this example there is just one DeepSpeed checkpoint sub-folder `global_step1`. Therefore to reconstruct the fp32
|
In this example there is just one DeepSpeed checkpoint sub-folder `global_step1`. Therefore to reconstruct the fp32
|
||||||
weights just run:
|
weights just run:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
python zero_to_fp32.py global_step1 pytorch_model.bin
|
python zero_to_fp32.py global_step1 pytorch_model.bin
|
||||||
|
|
||||||
The script will automatically handle either ZeRO-2 or ZeRO-3 checkpoint.
|
The script will automatically handle either ZeRO-2 or ZeRO-3 checkpoint.
|
||||||
|
|
||||||
@@ -1416,18 +1416,18 @@ be seen in the following example:
|
|||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
class ModuleZ3(torch.nn.Module):
|
class ModuleZ3(torch.nn.Module):
|
||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
super().__init__(self, *args)
|
super().__init__(self, *args)
|
||||||
self.layer1 = SomeLayer()
|
self.layer1 = SomeLayer()
|
||||||
self.layer2 = OtherLayer()
|
self.layer2 = OtherLayer()
|
||||||
deepspeed.zero.register_external_parameter(self, self.layer1.weight)
|
deepspeed.zero.register_external_parameter(self, self.layer1.weight)
|
||||||
|
|
||||||
def forward(self, input):
|
def forward(self, input):
|
||||||
x = self.layer1(input)
|
x = self.layer1(input)
|
||||||
# self.layer1.weight is needed in ModuleZ3.forward
|
# self.layer1.weight is needed in ModuleZ3.forward
|
||||||
y = self.layer2(x, self.layer1.weight)
|
y = self.layer2(x, self.layer1.weight)
|
||||||
return y
|
return y
|
||||||
|
|
||||||
In general ``transformers`` models don't use this style of referring to other layer's weights so most likely you won't
|
In general ``transformers`` models don't use this style of referring to other layer's weights so most likely you won't
|
||||||
need to use it.
|
need to use it.
|
||||||
@@ -1494,7 +1494,7 @@ Also under ZeRO-3, if you write your own code and run into a model parameter wei
|
|||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
tensor([1.], device='cuda:0', dtype=torch.float16, requires_grad=True)
|
tensor([1.], device='cuda:0', dtype=torch.float16, requires_grad=True)
|
||||||
|
|
||||||
stress on ``tensor([1.])``, or if you get an error where it says the parameter is of size ``1``, instead of some much
|
stress on ``tensor([1.])``, or if you get an error where it says the parameter is of size ``1``, instead of some much
|
||||||
larger multi-dimensional shape, this means that the parameter is partitioned and what you see is a ZeRO-3 placeholder.
|
larger multi-dimensional shape, this means that the parameter is partitioned and what you see is a ZeRO-3 placeholder.
|
||||||
|
|||||||
@@ -33,38 +33,38 @@ Example of using a model with MeCab and WordPiece tokenization:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from transformers import AutoModel, AutoTokenizer
|
>>> from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
>>> bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
|
>>> bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
|
>>> tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
|
||||||
|
|
||||||
>>> ## Input Japanese Text
|
>>> ## Input Japanese Text
|
||||||
>>> line = "吾輩は猫である。"
|
>>> line = "吾輩は猫である。"
|
||||||
|
|
||||||
>>> inputs = tokenizer(line, return_tensors="pt")
|
>>> inputs = tokenizer(line, return_tensors="pt")
|
||||||
|
|
||||||
>>> print(tokenizer.decode(inputs['input_ids'][0]))
|
>>> print(tokenizer.decode(inputs['input_ids'][0]))
|
||||||
[CLS] 吾輩 は 猫 で ある 。 [SEP]
|
[CLS] 吾輩 は 猫 で ある 。 [SEP]
|
||||||
|
|
||||||
>>> outputs = bertjapanese(**inputs)
|
>>> outputs = bertjapanese(**inputs)
|
||||||
|
|
||||||
Example of using a model with Character tokenization:
|
Example of using a model with Character tokenization:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
>>> bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
|
>>> bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
|
>>> tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
|
||||||
|
|
||||||
>>> ## Input Japanese Text
|
>>> ## Input Japanese Text
|
||||||
>>> line = "吾輩は猫である。"
|
>>> line = "吾輩は猫である。"
|
||||||
|
|
||||||
>>> inputs = tokenizer(line, return_tensors="pt")
|
>>> inputs = tokenizer(line, return_tensors="pt")
|
||||||
|
|
||||||
>>> print(tokenizer.decode(inputs['input_ids'][0]))
|
>>> print(tokenizer.decode(inputs['input_ids'][0]))
|
||||||
[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
|
[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
|
||||||
|
|
||||||
>>> outputs = bertjapanese(**inputs)
|
>>> outputs = bertjapanese(**inputs)
|
||||||
|
|
||||||
Tips:
|
Tips:
|
||||||
|
|
||||||
|
|||||||
@@ -38,22 +38,22 @@ Usage:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
# leverage checkpoints for Bert2Bert model...
|
# leverage checkpoints for Bert2Bert model...
|
||||||
# use BERT's cls token as BOS token and sep token as EOS token
|
# use BERT's cls token as BOS token and sep token as EOS token
|
||||||
encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
|
encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
|
||||||
# add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
|
# add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
|
||||||
decoder = BertGenerationDecoder.from_pretrained("bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102)
|
decoder = BertGenerationDecoder.from_pretrained("bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102)
|
||||||
bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
|
bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
|
||||||
|
|
||||||
# create tokenizer...
|
# create tokenizer...
|
||||||
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
|
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
|
||||||
|
|
||||||
input_ids = tokenizer('This is a long article to summarize', add_special_tokens=False, return_tensors="pt").input_ids
|
input_ids = tokenizer('This is a long article to summarize', add_special_tokens=False, return_tensors="pt").input_ids
|
||||||
labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
|
labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
|
||||||
|
|
||||||
# train...
|
# train...
|
||||||
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
|
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
|
|
||||||
- Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g.,
|
- Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g.,
|
||||||
@@ -61,15 +61,15 @@ Usage:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
# instantiate sentence fusion model
|
# instantiate sentence fusion model
|
||||||
sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
||||||
tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
||||||
|
|
||||||
input_ids = tokenizer('This is the first sentence. This is the second sentence.', add_special_tokens=False, return_tensors="pt").input_ids
|
input_ids = tokenizer('This is the first sentence. This is the second sentence.', add_special_tokens=False, return_tensors="pt").input_ids
|
||||||
|
|
||||||
outputs = sentence_fuser.generate(input_ids)
|
outputs = sentence_fuser.generate(input_ids)
|
||||||
|
|
||||||
print(tokenizer.decode(outputs[0]))
|
print(tokenizer.decode(outputs[0]))
|
||||||
|
|
||||||
|
|
||||||
Tips:
|
Tips:
|
||||||
|
|||||||
@@ -31,28 +31,28 @@ Example of use:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from transformers import AutoModel, AutoTokenizer
|
from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
|
bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
|
||||||
|
|
||||||
# For transformers v4.x+:
|
# For transformers v4.x+:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
|
||||||
|
|
||||||
# For transformers v3.x:
|
# For transformers v3.x:
|
||||||
# tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
|
# tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
|
||||||
|
|
||||||
# INPUT TWEET IS ALREADY NORMALIZED!
|
# INPUT TWEET IS ALREADY NORMALIZED!
|
||||||
line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
|
line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
|
||||||
|
|
||||||
input_ids = torch.tensor([tokenizer.encode(line)])
|
input_ids = torch.tensor([tokenizer.encode(line)])
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
features = bertweet(input_ids) # Models outputs are now tuples
|
features = bertweet(input_ids) # Models outputs are now tuples
|
||||||
|
|
||||||
## With TensorFlow 2.0+:
|
## With TensorFlow 2.0+:
|
||||||
# from transformers import TFAutoModel
|
# from transformers import TFAutoModel
|
||||||
# bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
|
# bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
|
||||||
|
|
||||||
|
|
||||||
The original code can be found `here <https://github.com/VinAIResearch/BERTweet>`__.
|
The original code can be found `here <https://github.com/VinAIResearch/BERTweet>`__.
|
||||||
|
|||||||
@@ -40,20 +40,20 @@ Examples of use:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
from transformers import HerbertTokenizer, RobertaModel
|
from transformers import HerbertTokenizer, RobertaModel
|
||||||
|
|
||||||
tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
||||||
model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
||||||
|
|
||||||
encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
|
encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
|
||||||
outputs = model(encoded_input)
|
outputs = model(encoded_input)
|
||||||
|
|
||||||
# HerBERT can also be loaded using AutoTokenizer and AutoModel:
|
# HerBERT can also be loaded using AutoTokenizer and AutoModel:
|
||||||
import torch
|
import torch
|
||||||
from transformers import AutoModel, AutoTokenizer
|
from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
||||||
model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
||||||
|
|
||||||
|
|
||||||
The original code can be found `here <https://github.com/allegro/HerBERT>`__.
|
The original code can be found `here <https://github.com/allegro/HerBERT>`__.
|
||||||
|
|||||||
@@ -56,24 +56,24 @@ Tips:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
def normalize_bbox(bbox, width, height):
|
def normalize_bbox(bbox, width, height):
|
||||||
return [
|
return [
|
||||||
int(1000 * (bbox[0] / width)),
|
int(1000 * (bbox[0] / width)),
|
||||||
int(1000 * (bbox[1] / height)),
|
int(1000 * (bbox[1] / height)),
|
||||||
int(1000 * (bbox[2] / width)),
|
int(1000 * (bbox[2] / width)),
|
||||||
int(1000 * (bbox[3] / height)),
|
int(1000 * (bbox[3] / height)),
|
||||||
]
|
]
|
||||||
|
|
||||||
Here, :obj:`width` and :obj:`height` correspond to the width and height of the original document in which the token
|
Here, :obj:`width` and :obj:`height` correspond to the width and height of the original document in which the token
|
||||||
occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
|
occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
image = Image.open("name_of_your_document - can be a png file, pdf, etc.")
|
image = Image.open("name_of_your_document - can be a png file, pdf, etc.")
|
||||||
|
|
||||||
width, height = image.size
|
width, height = image.size
|
||||||
|
|
||||||
- For a demo which shows how to fine-tune :class:`LayoutLMForTokenClassification` on the `FUNSD dataset
|
- For a demo which shows how to fine-tune :class:`LayoutLMForTokenClassification` on the `FUNSD dataset
|
||||||
<https://guillaumejaume.github.io/FUNSD/>`__ (a collection of annotated forms), see `this notebook
|
<https://guillaumejaume.github.io/FUNSD/>`__ (a collection of annotated forms), see `this notebook
|
||||||
|
|||||||
@@ -53,15 +53,15 @@ BERT-345M-uncased::
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
|
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
|
||||||
-O megatron_bert_345m_v0_1_uncased.zip
|
-O megatron_bert_345m_v0_1_uncased.zip
|
||||||
|
|
||||||
BERT-345M-cased::
|
BERT-345M-cased::
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
|
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
|
||||||
megatron_bert_345m_v0_1_cased.zip
|
megatron_bert_345m_v0_1_cased.zip
|
||||||
|
|
||||||
Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
|
Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
|
||||||
easily be loaded by Hugging Face Transformers and our port of the BERT code.
|
easily be loaded by Hugging Face Transformers and our port of the BERT code.
|
||||||
@@ -71,11 +71,11 @@ The following commands allow you to do the conversion. We assume that the folder
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
|
python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
|
python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
|
||||||
|
|
||||||
The original code can be found `here <https://github.com/NVIDIA/Megatron-LM>`__. That repository contains a multi-GPU
|
The original code can be found `here <https://github.com/NVIDIA/Megatron-LM>`__. That repository contains a multi-GPU
|
||||||
and multi-node implementation of the Megatron Language models. In particular, it contains a hybrid model parallel
|
and multi-node implementation of the Megatron Language models. In particular, it contains a hybrid model parallel
|
||||||
|
|||||||
@@ -51,8 +51,8 @@ Alternatively, you can directly download the checkpoints using::
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
|
wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
|
||||||
megatron_gpt2_345m_v0_0.zip
|
megatron_gpt2_345m_v0_0.zip
|
||||||
|
|
||||||
Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
|
Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
|
||||||
be loaded by Hugging Face Transformers GPT2 implementation.
|
be loaded by Hugging Face Transformers GPT2 implementation.
|
||||||
@@ -62,7 +62,7 @@ The following command allows you to do the conversion. We assume that the folder
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
|
python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
|
||||||
|
|
||||||
The original code can be found `here <https://github.com/NVIDIA/Megatron-LM>`__. That repository contains a multi-GPU
|
The original code can be found `here <https://github.com/NVIDIA/Megatron-LM>`__. That repository contains a multi-GPU
|
||||||
and multi-node implementation of the Megatron Language models. In particular, it contains a hybrid model parallel
|
and multi-node implementation of the Megatron Language models. In particular, it contains a hybrid model parallel
|
||||||
|
|||||||
@@ -31,23 +31,23 @@ Example of use:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from transformers import AutoModel, AutoTokenizer
|
from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
phobert = AutoModel.from_pretrained("vinai/phobert-base")
|
phobert = AutoModel.from_pretrained("vinai/phobert-base")
|
||||||
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
|
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
|
||||||
|
|
||||||
# INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
|
# INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
|
||||||
line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
|
line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
|
||||||
|
|
||||||
input_ids = torch.tensor([tokenizer.encode(line)])
|
input_ids = torch.tensor([tokenizer.encode(line)])
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
features = phobert(input_ids) # Models outputs are now tuples
|
features = phobert(input_ids) # Models outputs are now tuples
|
||||||
|
|
||||||
## With TensorFlow 2.0+:
|
## With TensorFlow 2.0+:
|
||||||
# from transformers import TFAutoModel
|
# from transformers import TFAutoModel
|
||||||
# phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
|
# phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
|
||||||
|
|
||||||
|
|
||||||
The original code can be found `here <https://github.com/VinAIResearch/PhoBERT>`__.
|
The original code can be found `here <https://github.com/VinAIResearch/PhoBERT>`__.
|
||||||
|
|||||||
@@ -145,8 +145,8 @@ For training, the :class:`~transformers.ReformerModelWithLMHead` should be used
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
input_ids = tokenizer.encode('This is a sentence from the training data', return_tensors='pt')
|
input_ids = tokenizer.encode('This is a sentence from the training data', return_tensors='pt')
|
||||||
loss = model(input_ids, labels=input_ids)[0]
|
loss = model(input_ids, labels=input_ids)[0]
|
||||||
|
|
||||||
|
|
||||||
ReformerConfig
|
ReformerConfig
|
||||||
|
|||||||
@@ -73,10 +73,10 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
|
input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
|
||||||
labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
|
labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
|
||||||
# the forward function automatically creates the correct decoder_input_ids
|
# the forward function automatically creates the correct decoder_input_ids
|
||||||
loss = model(input_ids=input_ids, labels=labels).loss
|
loss = model(input_ids=input_ids, labels=labels).loss
|
||||||
|
|
||||||
- Supervised training
|
- Supervised training
|
||||||
|
|
||||||
@@ -86,10 +86,10 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
|
input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
|
||||||
labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
|
labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
|
||||||
# the forward function automatically creates the correct decoder_input_ids
|
# the forward function automatically creates the correct decoder_input_ids
|
||||||
loss = model(input_ids=input_ids, labels=labels).loss
|
loss = model(input_ids=input_ids, labels=labels).loss
|
||||||
|
|
||||||
|
|
||||||
T5Config
|
T5Config
|
||||||
|
|||||||
@@ -70,19 +70,19 @@ Run all:
|
|||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
pytest
|
pytest
|
||||||
|
|
||||||
or:
|
or:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
make test
|
make test
|
||||||
|
|
||||||
Note that the latter is defined as:
|
Note that the latter is defined as:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
python -m pytest -n auto --dist=loadfile -s -v ./tests/
|
python -m pytest -n auto --dist=loadfile -s -v ./tests/
|
||||||
|
|
||||||
which tells pytest to:
|
which tells pytest to:
|
||||||
|
|
||||||
@@ -100,13 +100,13 @@ All tests of the test suite:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest --collect-only -q
|
pytest --collect-only -q
|
||||||
|
|
||||||
All tests of a given test file:
|
All tests of a given test file:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest tests/test_optimization.py --collect-only -q
|
pytest tests/test_optimization.py --collect-only -q
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -117,7 +117,7 @@ To run an individual test module:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest tests/test_logging.py
|
pytest tests/test_logging.py
|
||||||
|
|
||||||
|
|
||||||
Run specific tests
|
Run specific tests
|
||||||
@@ -128,7 +128,7 @@ class containing those tests. For example, it could be:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest tests/test_optimization.py::OptimizationTest::test_adam_w
|
pytest tests/test_optimization.py::OptimizationTest::test_adam_w
|
||||||
|
|
||||||
Here:
|
Here:
|
||||||
|
|
||||||
@@ -140,7 +140,7 @@ If the file contains multiple classes, you can choose to run only tests of a giv
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest tests/test_optimization.py::OptimizationTest
|
pytest tests/test_optimization.py::OptimizationTest
|
||||||
|
|
||||||
|
|
||||||
will run all the tests inside that class.
|
will run all the tests inside that class.
|
||||||
@@ -149,7 +149,7 @@ As mentioned earlier you can see what tests are contained inside the ``Optimizat
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest tests/test_optimization.py::OptimizationTest --collect-only -q
|
pytest tests/test_optimization.py::OptimizationTest --collect-only -q
|
||||||
|
|
||||||
You can run tests by keyword expressions.
|
You can run tests by keyword expressions.
|
||||||
|
|
||||||
@@ -157,7 +157,7 @@ To run only tests whose name contains ``adam``:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest -k adam tests/test_optimization.py
|
pytest -k adam tests/test_optimization.py
|
||||||
|
|
||||||
Logical ``and`` and ``or`` can be used to indicate whether all keywords should match or either. ``not`` can be used to
|
Logical ``and`` and ``or`` can be used to indicate whether all keywords should match or either. ``not`` can be used to
|
||||||
negate.
|
negate.
|
||||||
@@ -166,19 +166,19 @@ To run all tests except those whose name contains ``adam``:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest -k "not adam" tests/test_optimization.py
|
pytest -k "not adam" tests/test_optimization.py
|
||||||
|
|
||||||
And you can combine the two patterns in one:
|
And you can combine the two patterns in one:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest -k "ada and not adam" tests/test_optimization.py
|
pytest -k "ada and not adam" tests/test_optimization.py
|
||||||
|
|
||||||
For example to run both ``test_adafactor`` and ``test_adam_w`` you can use:
|
For example to run both ``test_adafactor`` and ``test_adam_w`` you can use:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest -k "test_adam_w or test_adam_w" tests/test_optimization.py
|
pytest -k "test_adam_w or test_adam_w" tests/test_optimization.py
|
||||||
|
|
||||||
Note that we use ``or`` here, since we want either of the keywords to match to include both.
|
Note that we use ``or`` here, since we want either of the keywords to match to include both.
|
||||||
|
|
||||||
@@ -186,7 +186,7 @@ If you want to include only tests that include both patterns, ``and`` is to be u
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest -k "test and ada" tests/test_optimization.py
|
pytest -k "test and ada" tests/test_optimization.py
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -251,7 +251,7 @@ example, to run all except ``test_modeling_*.py`` tests:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest `ls -1 tests/*py | grep -v test_modeling`
|
pytest `ls -1 tests/*py | grep -v test_modeling`
|
||||||
|
|
||||||
|
|
||||||
Clearing state
|
Clearing state
|
||||||
@@ -292,13 +292,13 @@ Repeat tests
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pip install pytest-flakefinder
|
pip install pytest-flakefinder
|
||||||
|
|
||||||
And then run every test multiple times (50 by default):
|
And then run every test multiple times (50 by default):
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
|
pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
This plugin doesn't work with ``-n`` flag from ``pytest-xdist``.
|
This plugin doesn't work with ``-n`` flag from ``pytest-xdist``.
|
||||||
@@ -322,19 +322,19 @@ As explained earlier this allows detection of coupled tests - where one test's s
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest tests
|
pytest tests
|
||||||
[...]
|
[...]
|
||||||
Using --random-order-bucket=module
|
Using --random-order-bucket=module
|
||||||
Using --random-order-seed=573663
|
Using --random-order-seed=573663
|
||||||
|
|
||||||
So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
|
So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest --random-order-seed=573663
|
pytest --random-order-seed=573663
|
||||||
[...]
|
[...]
|
||||||
Using --random-order-bucket=module
|
Using --random-order-bucket=module
|
||||||
Using --random-order-seed=573663
|
Using --random-order-seed=573663
|
||||||
|
|
||||||
It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
|
It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
|
||||||
manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
|
manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
|
||||||
@@ -342,7 +342,7 @@ they failed and tell pytest to not randomize them instead using ``--random-order
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
|
pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
|
||||||
|
|
||||||
To disable the shuffling for all tests:
|
To disable the shuffling for all tests:
|
||||||
|
|
||||||
@@ -369,7 +369,7 @@ progressbar, and show tests that fail and the assert instantly. It gets activate
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pip install pytest-sugar
|
pip install pytest-sugar
|
||||||
|
|
||||||
To run tests without it, run:
|
To run tests without it, run:
|
||||||
|
|
||||||
@@ -388,7 +388,7 @@ For a single or a group of tests via ``pytest`` (after ``pip install pytest-pspe
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest --pspec tests/test_optimization.py
|
pytest --pspec tests/test_optimization.py
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -490,8 +490,8 @@ Inside tests:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
from transformers.testing_utils import get_gpu_count
|
from transformers.testing_utils import get_gpu_count
|
||||||
n_gpu = get_gpu_count() # works with torch and tf
|
n_gpu = get_gpu_count() # works with torch and tf
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -514,8 +514,8 @@ You will need at least 2 GPUs to see these tests in action:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES="0,1" RUN_SLOW=1 pytest -sv examples/seq2seq/test_finetune_trainer.py \
|
CUDA_VISIBLE_DEVICES="0,1" RUN_SLOW=1 pytest -sv examples/seq2seq/test_finetune_trainer.py \
|
||||||
examples/seq2seq/test_seq2seq_examples_multi_gpu.py
|
examples/seq2seq/test_seq2seq_examples_multi_gpu.py
|
||||||
|
|
||||||
|
|
||||||
Output capture
|
Output capture
|
||||||
@@ -528,13 +528,13 @@ To disable output capturing and to get the ``stdout`` and ``stderr`` normally, u
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest -s tests/test_logging.py
|
pytest -s tests/test_logging.py
|
||||||
|
|
||||||
To send test results to JUnit format output:
|
To send test results to JUnit format output:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
py.test tests --junitxml=result.xml
|
py.test tests --junitxml=result.xml
|
||||||
|
|
||||||
|
|
||||||
Color control
|
Color control
|
||||||
@@ -544,7 +544,7 @@ To have no color (e.g., yellow on white background is not readable):
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest --color=no tests/test_logging.py
|
pytest --color=no tests/test_logging.py
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -555,7 +555,7 @@ Creating a URL for each test failure:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest --pastebin=failed tests/test_logging.py
|
pytest --pastebin=failed tests/test_logging.py
|
||||||
|
|
||||||
This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
|
This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
|
||||||
tests as usual or add for example -x if you only want to send one particular failure.
|
tests as usual or add for example -x if you only want to send one particular failure.
|
||||||
@@ -564,7 +564,7 @@ Creating a URL for a whole test session log:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest --pastebin=all tests/test_logging.py
|
pytest --pastebin=all tests/test_logging.py
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -606,13 +606,13 @@ and you could run just the ``negative`` and ``integer`` sets of params with:
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest -k "negative and integer" tests/test_mytest.py
|
pytest -k "negative and integer" tests/test_mytest.py
|
||||||
|
|
||||||
or all but ``negative`` sub-tests, with:
|
or all but ``negative`` sub-tests, with:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pytest -k "not negative" tests/test_mytest.py
|
pytest -k "not negative" tests/test_mytest.py
|
||||||
|
|
||||||
Besides using the ``-k`` filter that was just mentioned, you can find out the exact name of each sub-test and run any
|
Besides using the ``-k`` filter that was just mentioned, you can find out the exact name of each sub-test and run any
|
||||||
or all of them using their exact names.
|
or all of them using their exact names.
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ _re_indent = re.compile(r"^(\s*)\S")
|
|||||||
_re_table = re.compile(r"(\+-+)+\+\s*$")
|
_re_table = re.compile(r"(\+-+)+\+\s*$")
|
||||||
# Matches a code block in rst `:: `.
|
# Matches a code block in rst `:: `.
|
||||||
_re_code_block = re.compile(r"^\s*::\s*$")
|
_re_code_block = re.compile(r"^\s*::\s*$")
|
||||||
|
_re_code_block_explicit = re.compile(r"^\.\.\s+code\-block::")
|
||||||
# Matches any block of the form `.. something::` or `.. something:: bla`.
|
# Matches any block of the form `.. something::` or `.. something:: bla`.
|
||||||
_re_ignore = re.compile(r"^\s*\.\.\s+(.*?)\s*::\s*\S*\s*$")
|
_re_ignore = re.compile(r"^\s*\.\.\s+(.*?)\s*::\s*\S*\s*$")
|
||||||
# Matches comment introduction in rst.
|
# Matches comment introduction in rst.
|
||||||
@@ -374,6 +375,28 @@ rst_styler = CodeStyler()
|
|||||||
doc_styler = DocstringStyler()
|
doc_styler = DocstringStyler()
|
||||||
|
|
||||||
|
|
||||||
|
def _reindent_code_blocks(text):
|
||||||
|
"""Checks indent in code blocks is of four"""
|
||||||
|
lines = text.split("\n")
|
||||||
|
idx = 0
|
||||||
|
while idx < len(lines):
|
||||||
|
# Detect if the line is the start of a new code-block.
|
||||||
|
if _re_code_block.search(lines[idx]) is not None or _re_code_block_explicit.search(lines[idx]) is not None:
|
||||||
|
while len(get_indent(lines[idx])) == 0:
|
||||||
|
idx += 1
|
||||||
|
indent = len(get_indent(lines[idx]))
|
||||||
|
should_continue = True
|
||||||
|
while should_continue:
|
||||||
|
if len(lines[idx]) > 0 and indent < 4:
|
||||||
|
lines[idx] = " " * 4 + lines[idx][indent:]
|
||||||
|
idx += 1
|
||||||
|
should_continue = (idx < len(lines)) and (len(lines[idx]) == 0 or len(get_indent(lines[idx])) > 0)
|
||||||
|
else:
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def _add_new_lines_before_list(text):
|
def _add_new_lines_before_list(text):
|
||||||
"""Add a new empty line before a list begins."""
|
"""Add a new empty line before a list begins."""
|
||||||
lines = text.split("\n")
|
lines = text.split("\n")
|
||||||
@@ -412,8 +435,10 @@ def style_rst_file(doc_file, max_len=119, check_only=False):
|
|||||||
with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
|
with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
|
||||||
doc = f.read()
|
doc = f.read()
|
||||||
|
|
||||||
|
# Make sure code blocks are indented at 4
|
||||||
|
clean_doc = _reindent_code_blocks(doc)
|
||||||
# Add missing new lines before lists
|
# Add missing new lines before lists
|
||||||
clean_doc = _add_new_lines_before_list(doc)
|
clean_doc = _add_new_lines_before_list(clean_doc)
|
||||||
# Style
|
# Style
|
||||||
clean_doc = rst_styler.style(clean_doc, max_len=max_len)
|
clean_doc = rst_styler.style(clean_doc, max_len=max_len)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user