Merge branch 'master' of github.com:huggingface/transformers
This commit is contained in:
@@ -348,7 +348,7 @@ jobs:
|
|||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
|
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
|
||||||
- run: pip install --upgrade pip
|
- run: pip install --upgrade pip
|
||||||
- run: pip install ."[all, docs]"
|
- run: pip install ."[docs]"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-build_doc-{{ checksum "setup.py" }}
|
key: v0.4-build_doc-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
@@ -370,7 +370,7 @@ jobs:
|
|||||||
keys:
|
keys:
|
||||||
- v0.4-deploy_doc-{{ checksum "setup.py" }}
|
- v0.4-deploy_doc-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
- run: pip install ."[all,docs]"
|
- run: pip install ."[docs]"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-deploy_doc-{{ checksum "setup.py" }}
|
key: v0.4-deploy_doc-{{ checksum "setup.py" }}
|
||||||
paths:
|
paths:
|
||||||
|
|||||||
4
.github/workflows/self-scheduled.yml
vendored
4
.github/workflows/self-scheduled.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
apt -y update && apt install -y libsndfile1-dev
|
apt -y update && apt install -y libsndfile1-dev
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
|
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,deepspeed]
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
@@ -155,7 +155,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
apt -y update && apt install -y libsndfile1-dev
|
apt -y update && apt install -y libsndfile1-dev
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
|
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,deepspeed,fairscale]
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -274,6 +274,14 @@ Install the library via pypi:
|
|||||||
|
|
||||||
pip install fairscale
|
pip install fairscale
|
||||||
|
|
||||||
|
or via ``transformers``' ``extras``:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
pip install transformers[fairscale]
|
||||||
|
|
||||||
|
(will become available starting from ``transformers==4.6.0``)
|
||||||
|
|
||||||
or find more details on `the FairScale's GitHub page <https://github.com/facebookresearch/fairscale/#installation>`__.
|
or find more details on `the FairScale's GitHub page <https://github.com/facebookresearch/fairscale/#installation>`__.
|
||||||
|
|
||||||
If you're still struggling with the build, first make sure to read :ref:`zero-install-notes`.
|
If you're still struggling with the build, first make sure to read :ref:`zero-install-notes`.
|
||||||
@@ -419,6 +427,14 @@ Install the library via pypi:
|
|||||||
|
|
||||||
pip install deepspeed
|
pip install deepspeed
|
||||||
|
|
||||||
|
or via ``transformers``' ``extras``:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
pip install transformers[deepspeed]
|
||||||
|
|
||||||
|
(will become available starting from ``transformers==4.6.0``)
|
||||||
|
|
||||||
or find more details on `the DeepSpeed's GitHub page <https://github.com/microsoft/deepspeed#installation>`__ and
|
or find more details on `the DeepSpeed's GitHub page <https://github.com/microsoft/deepspeed#installation>`__ and
|
||||||
`advanced install <https://www.deepspeed.ai/tutorials/advanced-install/>`__.
|
`advanced install <https://www.deepspeed.ai/tutorials/advanced-install/>`__.
|
||||||
|
|
||||||
@@ -525,7 +541,7 @@ Here is an example of running ``run_translation.py`` under DeepSpeed deploying a
|
|||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
deepspeed examples/seq2seq/run_translation.py \
|
deepspeed examples/seq2seq/run_translation.py \
|
||||||
--deepspeed examples/tests/deepspeed/ds_config.json \
|
--deepspeed tests/deepspeed/ds_config.json \
|
||||||
--model_name_or_path t5-small --per_device_train_batch_size 1 \
|
--model_name_or_path t5-small --per_device_train_batch_size 1 \
|
||||||
--output_dir output_dir --overwrite_output_dir --fp16 \
|
--output_dir output_dir --overwrite_output_dir --fp16 \
|
||||||
--do_train --max_train_samples 500 --num_train_epochs 1 \
|
--do_train --max_train_samples 500 --num_train_epochs 1 \
|
||||||
@@ -550,7 +566,7 @@ To deploy DeepSpeed with one GPU adjust the :class:`~transformers.Trainer` comma
|
|||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
deepspeed --num_gpus=1 examples/seq2seq/run_translation.py \
|
deepspeed --num_gpus=1 examples/seq2seq/run_translation.py \
|
||||||
--deepspeed examples/tests/deepspeed/ds_config.json \
|
--deepspeed tests/deepspeed/ds_config.json \
|
||||||
--model_name_or_path t5-small --per_device_train_batch_size 1 \
|
--model_name_or_path t5-small --per_device_train_batch_size 1 \
|
||||||
--output_dir output_dir --overwrite_output_dir --fp16 \
|
--output_dir output_dir --overwrite_output_dir --fp16 \
|
||||||
--do_train --max_train_samples 500 --num_train_epochs 1 \
|
--do_train --max_train_samples 500 --num_train_epochs 1 \
|
||||||
|
|||||||
@@ -795,6 +795,23 @@ leave any data in there.
|
|||||||
otherwise.
|
otherwise.
|
||||||
|
|
||||||
|
|
||||||
|
Temporary sys.path override
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
If you need to temporary override ``sys.path`` to import from another test for example, you can use the
|
||||||
|
``ExtendSysPath`` context manager. Example:
|
||||||
|
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import os
|
||||||
|
from transformers.testing_utils import ExtendSysPath
|
||||||
|
bindir = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
with ExtendSysPath(f"{bindir}/.."):
|
||||||
|
from test_trainer import TrainerIntegrationCommon # noqa
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Skipping tests
|
Skipping tests
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
|||||||
@@ -422,7 +422,12 @@ def main():
|
|||||||
|
|
||||||
# Data collator
|
# Data collator
|
||||||
# This one will take care of randomly masking the tokens.
|
# This one will take care of randomly masking the tokens.
|
||||||
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=data_args.mlm_probability)
|
pad_to_multiple_of_8 = data_args.line_by_line and training_args.fp16 and not data_args.pad_to_max_length
|
||||||
|
data_collator = DataCollatorForLanguageModeling(
|
||||||
|
tokenizer=tokenizer,
|
||||||
|
mlm_probability=data_args.mlm_probability,
|
||||||
|
pad_to_multiple_of=8 if pad_to_multiple_of_8 else None,
|
||||||
|
)
|
||||||
|
|
||||||
# Initialize our Trainer
|
# Initialize our Trainer
|
||||||
trainer = Trainer(
|
trainer = Trainer(
|
||||||
|
|||||||
39
setup.py
39
setup.py
@@ -85,11 +85,14 @@ if stale_egg_info.exists():
|
|||||||
# 1. all dependencies should be listed here with their version requirements if any
|
# 1. all dependencies should be listed here with their version requirements if any
|
||||||
# 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py
|
# 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py
|
||||||
_deps = [
|
_deps = [
|
||||||
|
"Pillow",
|
||||||
"black>=20.8b1",
|
"black>=20.8b1",
|
||||||
"cookiecutter==1.7.2",
|
"cookiecutter==1.7.2",
|
||||||
"dataclasses",
|
"dataclasses",
|
||||||
"datasets",
|
"datasets",
|
||||||
|
"deepspeed>0.3.13",
|
||||||
"docutils==0.16.0",
|
"docutils==0.16.0",
|
||||||
|
"fairscale>0.3",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"filelock",
|
"filelock",
|
||||||
@@ -102,13 +105,13 @@ _deps = [
|
|||||||
"jax>=0.2.8",
|
"jax>=0.2.8",
|
||||||
"jaxlib>=0.1.59",
|
"jaxlib>=0.1.59",
|
||||||
"keras2onnx",
|
"keras2onnx",
|
||||||
|
"nltk",
|
||||||
"numpy>=1.17",
|
"numpy>=1.17",
|
||||||
"onnxconverter-common",
|
"onnxconverter-common",
|
||||||
"onnxruntime-tools>=1.4.2",
|
"onnxruntime-tools>=1.4.2",
|
||||||
"onnxruntime>=1.4.0",
|
"onnxruntime>=1.4.0",
|
||||||
"packaging",
|
"packaging",
|
||||||
"parameterized",
|
"parameterized",
|
||||||
"Pillow",
|
|
||||||
"protobuf",
|
"protobuf",
|
||||||
"psutil",
|
"psutil",
|
||||||
"pydantic",
|
"pydantic",
|
||||||
@@ -119,15 +122,18 @@ _deps = [
|
|||||||
"recommonmark",
|
"recommonmark",
|
||||||
"regex!=2019.12.17",
|
"regex!=2019.12.17",
|
||||||
"requests",
|
"requests",
|
||||||
|
"rouge-score",
|
||||||
|
"sacrebleu>=1.4.12",
|
||||||
"sacremoses",
|
"sacremoses",
|
||||||
|
"sagemaker>=2.31.0",
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
"sentencepiece==0.1.91",
|
"sentencepiece==0.1.91",
|
||||||
"soundfile",
|
"soundfile",
|
||||||
"sphinx-copybutton",
|
"sphinx-copybutton",
|
||||||
"sphinx-markdown-tables",
|
"sphinx-markdown-tables",
|
||||||
"sphinx-rtd-theme==0.4.3", # sphinx-rtd-theme==0.5.0 introduced big changes in the style.
|
"sphinx-rtd-theme==0.4.3", # sphinx-rtd-theme==0.5.0 introduced big changes in the style.
|
||||||
"sphinxext-opengraph==0.4.1",
|
|
||||||
"sphinx==3.2.1",
|
"sphinx==3.2.1",
|
||||||
|
"sphinxext-opengraph==0.4.1",
|
||||||
"starlette",
|
"starlette",
|
||||||
"tensorflow-cpu>=2.3",
|
"tensorflow-cpu>=2.3",
|
||||||
"tensorflow>=2.3",
|
"tensorflow>=2.3",
|
||||||
@@ -139,7 +145,6 @@ _deps = [
|
|||||||
"unidic>=1.0.2",
|
"unidic>=1.0.2",
|
||||||
"unidic_lite>=1.0.7",
|
"unidic_lite>=1.0.7",
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
"sagemaker>=2.31.0",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -230,6 +235,8 @@ extras["onnx"] = deps_list("onnxconverter-common", "keras2onnx") + extras["onnxr
|
|||||||
extras["modelcreation"] = deps_list("cookiecutter")
|
extras["modelcreation"] = deps_list("cookiecutter")
|
||||||
|
|
||||||
extras["sagemaker"] = deps_list("sagemaker")
|
extras["sagemaker"] = deps_list("sagemaker")
|
||||||
|
extras["deepspeed"] = deps_list("deepspeed")
|
||||||
|
extras["fairscale"] = deps_list("fairscale")
|
||||||
|
|
||||||
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
|
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
|
||||||
extras["speech"] = deps_list("soundfile", "torchaudio")
|
extras["speech"] = deps_list("soundfile", "torchaudio")
|
||||||
@@ -238,20 +245,12 @@ extras["vision"] = deps_list("Pillow")
|
|||||||
extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
|
extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
|
||||||
extras["testing"] = (
|
extras["testing"] = (
|
||||||
deps_list(
|
deps_list(
|
||||||
"pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-sugar", "black"
|
"pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-sugar", "black", "sacrebleu", "rouge-score", "nltk"
|
||||||
)
|
)
|
||||||
+ extras["retrieval"]
|
+ extras["retrieval"]
|
||||||
+ extras["modelcreation"]
|
+ extras["modelcreation"]
|
||||||
)
|
)
|
||||||
extras["docs"] = deps_list(
|
|
||||||
"docutils",
|
|
||||||
"recommonmark",
|
|
||||||
"sphinx",
|
|
||||||
"sphinx-markdown-tables",
|
|
||||||
"sphinx-rtd-theme",
|
|
||||||
"sphinx-copybutton",
|
|
||||||
"sphinxext-opengraph",
|
|
||||||
)
|
|
||||||
extras["quality"] = deps_list("black", "isort", "flake8")
|
extras["quality"] = deps_list("black", "isort", "flake8")
|
||||||
|
|
||||||
extras["all"] = (
|
extras["all"] = (
|
||||||
@@ -264,12 +263,24 @@ extras["all"] = (
|
|||||||
+ extras["vision"]
|
+ extras["vision"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
extras["docs_specific"] = deps_list(
|
||||||
|
"docutils",
|
||||||
|
"recommonmark",
|
||||||
|
"sphinx",
|
||||||
|
"sphinx-markdown-tables",
|
||||||
|
"sphinx-rtd-theme",
|
||||||
|
"sphinx-copybutton",
|
||||||
|
"sphinxext-opengraph",
|
||||||
|
)
|
||||||
|
# "docs" needs "all" to resolve all the references
|
||||||
|
extras["docs"] = extras["all"] + extras["docs_specific"]
|
||||||
|
|
||||||
extras["dev"] = (
|
extras["dev"] = (
|
||||||
extras["all"]
|
extras["all"]
|
||||||
+ extras["testing"]
|
+ extras["testing"]
|
||||||
+ extras["quality"]
|
+ extras["quality"]
|
||||||
+ extras["ja"]
|
+ extras["ja"]
|
||||||
+ extras["docs"]
|
+ extras["docs_specific"]
|
||||||
+ extras["sklearn"]
|
+ extras["sklearn"]
|
||||||
+ extras["modelcreation"]
|
+ extras["modelcreation"]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -192,7 +192,7 @@ class DataCollatorForTokenClassification:
|
|||||||
return batch
|
return batch
|
||||||
|
|
||||||
|
|
||||||
def _collate_batch(examples, tokenizer):
|
def _collate_batch(examples, tokenizer, pad_to_multiple_of: Optional[int] = None):
|
||||||
"""Collate `examples` into a batch, using the information in `tokenizer` for padding if necessary."""
|
"""Collate `examples` into a batch, using the information in `tokenizer` for padding if necessary."""
|
||||||
# Tensorize if necessary.
|
# Tensorize if necessary.
|
||||||
if isinstance(examples[0], (list, tuple)):
|
if isinstance(examples[0], (list, tuple)):
|
||||||
@@ -201,7 +201,7 @@ def _collate_batch(examples, tokenizer):
|
|||||||
# Check if padding is necessary.
|
# Check if padding is necessary.
|
||||||
length_of_first = examples[0].size(0)
|
length_of_first = examples[0].size(0)
|
||||||
are_tensors_same_length = all(x.size(0) == length_of_first for x in examples)
|
are_tensors_same_length = all(x.size(0) == length_of_first for x in examples)
|
||||||
if are_tensors_same_length:
|
if are_tensors_same_length and (pad_to_multiple_of is None or length_of_first % pad_to_multiple_of == 0):
|
||||||
return torch.stack(examples, dim=0)
|
return torch.stack(examples, dim=0)
|
||||||
|
|
||||||
# If yes, check if we have a `pad_token`.
|
# If yes, check if we have a `pad_token`.
|
||||||
@@ -213,6 +213,8 @@ def _collate_batch(examples, tokenizer):
|
|||||||
|
|
||||||
# Creating the full tensor and filling it with our data.
|
# Creating the full tensor and filling it with our data.
|
||||||
max_length = max(x.size(0) for x in examples)
|
max_length = max(x.size(0) for x in examples)
|
||||||
|
if pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
|
||||||
|
max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
|
||||||
result = examples[0].new_full([len(examples), max_length], tokenizer.pad_token_id)
|
result = examples[0].new_full([len(examples), max_length], tokenizer.pad_token_id)
|
||||||
for i, example in enumerate(examples):
|
for i, example in enumerate(examples):
|
||||||
if tokenizer.padding_side == "right":
|
if tokenizer.padding_side == "right":
|
||||||
@@ -311,6 +313,8 @@ class DataCollatorForLanguageModeling:
|
|||||||
non-masked tokens and the value to predict for the masked token.
|
non-masked tokens and the value to predict for the masked token.
|
||||||
mlm_probability (:obj:`float`, `optional`, defaults to 0.15):
|
mlm_probability (:obj:`float`, `optional`, defaults to 0.15):
|
||||||
The probability with which to (randomly) mask tokens in the input, when :obj:`mlm` is set to :obj:`True`.
|
The probability with which to (randomly) mask tokens in the input, when :obj:`mlm` is set to :obj:`True`.
|
||||||
|
pad_to_multiple_of (:obj:`int`, `optional`):
|
||||||
|
If set will pad the sequence to a multiple of the provided value.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
@@ -323,6 +327,7 @@ class DataCollatorForLanguageModeling:
|
|||||||
tokenizer: PreTrainedTokenizerBase
|
tokenizer: PreTrainedTokenizerBase
|
||||||
mlm: bool = True
|
mlm: bool = True
|
||||||
mlm_probability: float = 0.15
|
mlm_probability: float = 0.15
|
||||||
|
pad_to_multiple_of: Optional[int] = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.mlm and self.tokenizer.mask_token is None:
|
if self.mlm and self.tokenizer.mask_token is None:
|
||||||
@@ -336,9 +341,9 @@ class DataCollatorForLanguageModeling:
|
|||||||
) -> Dict[str, torch.Tensor]:
|
) -> Dict[str, torch.Tensor]:
|
||||||
# Handle dict or lists with proper padding and conversion to tensor.
|
# Handle dict or lists with proper padding and conversion to tensor.
|
||||||
if isinstance(examples[0], (dict, BatchEncoding)):
|
if isinstance(examples[0], (dict, BatchEncoding)):
|
||||||
batch = self.tokenizer.pad(examples, return_tensors="pt")
|
batch = self.tokenizer.pad(examples, return_tensors="pt", pad_to_multiple_of=self.pad_to_multiple_of)
|
||||||
else:
|
else:
|
||||||
batch = {"input_ids": _collate_batch(examples, self.tokenizer)}
|
batch = {"input_ids": _collate_batch(examples, self.tokenizer, pad_to_multiple_of=self.pad_to_multiple_of)}
|
||||||
|
|
||||||
# If special token mask has been preprocessed, pop it from the dict.
|
# If special token mask has been preprocessed, pop it from the dict.
|
||||||
special_tokens_mask = batch.pop("special_tokens_mask", None)
|
special_tokens_mask = batch.pop("special_tokens_mask", None)
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
from .dependency_versions_table import deps
|
from .dependency_versions_table import deps
|
||||||
from .utils.versions import require_version_core
|
from .utils.versions import require_version, require_version_core
|
||||||
|
|
||||||
|
|
||||||
# define which module versions we always want to check at run time
|
# define which module versions we always want to check at run time
|
||||||
@@ -41,3 +41,7 @@ for pkg in pkgs_to_check_at_runtime:
|
|||||||
require_version_core(deps[pkg])
|
require_version_core(deps[pkg])
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
|
raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
|
||||||
|
|
||||||
|
|
||||||
|
def dep_version_check(pkg, hint=None):
|
||||||
|
require_version(deps[pkg], hint)
|
||||||
|
|||||||
@@ -2,11 +2,14 @@
|
|||||||
# 1. modify the `_deps` dict in setup.py
|
# 1. modify the `_deps` dict in setup.py
|
||||||
# 2. run `make deps_table_update``
|
# 2. run `make deps_table_update``
|
||||||
deps = {
|
deps = {
|
||||||
|
"Pillow": "Pillow",
|
||||||
"black": "black>=20.8b1",
|
"black": "black>=20.8b1",
|
||||||
"cookiecutter": "cookiecutter==1.7.2",
|
"cookiecutter": "cookiecutter==1.7.2",
|
||||||
"dataclasses": "dataclasses",
|
"dataclasses": "dataclasses",
|
||||||
"datasets": "datasets",
|
"datasets": "datasets",
|
||||||
|
"deepspeed": "deepspeed>0.3.13",
|
||||||
"docutils": "docutils==0.16.0",
|
"docutils": "docutils==0.16.0",
|
||||||
|
"fairscale": "fairscale>0.3",
|
||||||
"faiss-cpu": "faiss-cpu",
|
"faiss-cpu": "faiss-cpu",
|
||||||
"fastapi": "fastapi",
|
"fastapi": "fastapi",
|
||||||
"filelock": "filelock",
|
"filelock": "filelock",
|
||||||
@@ -19,13 +22,13 @@ deps = {
|
|||||||
"jax": "jax>=0.2.8",
|
"jax": "jax>=0.2.8",
|
||||||
"jaxlib": "jaxlib>=0.1.59",
|
"jaxlib": "jaxlib>=0.1.59",
|
||||||
"keras2onnx": "keras2onnx",
|
"keras2onnx": "keras2onnx",
|
||||||
|
"nltk": "nltk",
|
||||||
"numpy": "numpy>=1.17",
|
"numpy": "numpy>=1.17",
|
||||||
"onnxconverter-common": "onnxconverter-common",
|
"onnxconverter-common": "onnxconverter-common",
|
||||||
"onnxruntime-tools": "onnxruntime-tools>=1.4.2",
|
"onnxruntime-tools": "onnxruntime-tools>=1.4.2",
|
||||||
"onnxruntime": "onnxruntime>=1.4.0",
|
"onnxruntime": "onnxruntime>=1.4.0",
|
||||||
"packaging": "packaging",
|
"packaging": "packaging",
|
||||||
"parameterized": "parameterized",
|
"parameterized": "parameterized",
|
||||||
"Pillow": "Pillow",
|
|
||||||
"protobuf": "protobuf",
|
"protobuf": "protobuf",
|
||||||
"psutil": "psutil",
|
"psutil": "psutil",
|
||||||
"pydantic": "pydantic",
|
"pydantic": "pydantic",
|
||||||
@@ -36,15 +39,18 @@ deps = {
|
|||||||
"recommonmark": "recommonmark",
|
"recommonmark": "recommonmark",
|
||||||
"regex": "regex!=2019.12.17",
|
"regex": "regex!=2019.12.17",
|
||||||
"requests": "requests",
|
"requests": "requests",
|
||||||
|
"rouge-score": "rouge-score",
|
||||||
|
"sacrebleu": "sacrebleu>=1.4.12",
|
||||||
"sacremoses": "sacremoses",
|
"sacremoses": "sacremoses",
|
||||||
|
"sagemaker": "sagemaker>=2.31.0",
|
||||||
"scikit-learn": "scikit-learn",
|
"scikit-learn": "scikit-learn",
|
||||||
"sentencepiece": "sentencepiece==0.1.91",
|
"sentencepiece": "sentencepiece==0.1.91",
|
||||||
"soundfile": "soundfile",
|
"soundfile": "soundfile",
|
||||||
"sphinx-copybutton": "sphinx-copybutton",
|
"sphinx-copybutton": "sphinx-copybutton",
|
||||||
"sphinx-markdown-tables": "sphinx-markdown-tables",
|
"sphinx-markdown-tables": "sphinx-markdown-tables",
|
||||||
"sphinx-rtd-theme": "sphinx-rtd-theme==0.4.3",
|
"sphinx-rtd-theme": "sphinx-rtd-theme==0.4.3",
|
||||||
"sphinxext-opengraph": "sphinxext-opengraph==0.4.1",
|
|
||||||
"sphinx": "sphinx==3.2.1",
|
"sphinx": "sphinx==3.2.1",
|
||||||
|
"sphinxext-opengraph": "sphinxext-opengraph==0.4.1",
|
||||||
"starlette": "starlette",
|
"starlette": "starlette",
|
||||||
"tensorflow-cpu": "tensorflow-cpu>=2.3",
|
"tensorflow-cpu": "tensorflow-cpu>=2.3",
|
||||||
"tensorflow": "tensorflow>=2.3",
|
"tensorflow": "tensorflow>=2.3",
|
||||||
@@ -56,5 +62,4 @@ deps = {
|
|||||||
"unidic": "unidic>=1.0.2",
|
"unidic": "unidic>=1.0.2",
|
||||||
"unidic_lite": "unidic_lite>=1.0.7",
|
"unidic_lite": "unidic_lite>=1.0.7",
|
||||||
"uvicorn": "uvicorn",
|
"uvicorn": "uvicorn",
|
||||||
"sagemaker": "sagemaker>=2.31.0",
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ import tempfile
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .dependency_versions_check import dep_version_check
|
||||||
from .utils import logging
|
from .utils import logging
|
||||||
from .utils.versions import require_version
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
@@ -324,7 +324,7 @@ def deepspeed_parse_config(ds_config):
|
|||||||
|
|
||||||
If it's already a dict, return a copy of it, so that we can freely modify it.
|
If it's already a dict, return a copy of it, so that we can freely modify it.
|
||||||
"""
|
"""
|
||||||
require_version("deepspeed>0.3.13")
|
dep_version_check("deepspeed")
|
||||||
|
|
||||||
if isinstance(ds_config, dict):
|
if isinstance(ds_config, dict):
|
||||||
# Don't modify user's data should they want to reuse it (e.g. in tests), because once we
|
# Don't modify user's data should they want to reuse it (e.g. in tests), because once we
|
||||||
@@ -604,7 +604,9 @@ class TensorBoardCallback(TrainerCallback):
|
|||||||
self.tb_writer.add_hparams(args.to_sanitized_dict(), metric_dict={})
|
self.tb_writer.add_hparams(args.to_sanitized_dict(), metric_dict={})
|
||||||
|
|
||||||
def on_log(self, args, state, control, logs=None, **kwargs):
|
def on_log(self, args, state, control, logs=None, **kwargs):
|
||||||
if state.is_world_process_zero:
|
if not state.is_world_process_zero:
|
||||||
|
return
|
||||||
|
|
||||||
if self.tb_writer is None:
|
if self.tb_writer is None:
|
||||||
self._init_summary_writer(args)
|
self._init_summary_writer(args)
|
||||||
|
|
||||||
|
|||||||
@@ -387,6 +387,7 @@ class FlaxPreTrainedModel(ABC):
|
|||||||
# get abs dir
|
# get abs dir
|
||||||
save_directory = os.path.abspath(save_directory)
|
save_directory = os.path.abspath(save_directory)
|
||||||
# save config as well
|
# save config as well
|
||||||
|
self.config.architectures = [self.__class__.__name__[4:]]
|
||||||
self.config.save_pretrained(save_directory)
|
self.config.save_pretrained(save_directory)
|
||||||
|
|
||||||
# save model
|
# save model
|
||||||
|
|||||||
@@ -1037,6 +1037,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
|
|||||||
logger.info(f"Saved model created in {saved_model_dir}")
|
logger.info(f"Saved model created in {saved_model_dir}")
|
||||||
|
|
||||||
# Save configuration file
|
# Save configuration file
|
||||||
|
self.config.architectures = [self.__class__.__name__[2:]]
|
||||||
self.config.save_pretrained(save_directory)
|
self.config.save_pretrained(save_directory)
|
||||||
|
|
||||||
# If we save using the predefined names, we can load using `from_pretrained`
|
# If we save using the predefined names, we can load using `from_pretrained`
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ from ...file_utils import _BaseLazyModule, is_flax_available, is_tf_available, i
|
|||||||
|
|
||||||
|
|
||||||
_import_structure = {
|
_import_structure = {
|
||||||
|
"auto_factory": ["get_values"],
|
||||||
"configuration_auto": ["ALL_PRETRAINED_CONFIG_ARCHIVE_MAP", "CONFIG_MAPPING", "MODEL_NAMES_MAPPING", "AutoConfig"],
|
"configuration_auto": ["ALL_PRETRAINED_CONFIG_ARCHIVE_MAP", "CONFIG_MAPPING", "MODEL_NAMES_MAPPING", "AutoConfig"],
|
||||||
"feature_extraction_auto": ["FEATURE_EXTRACTOR_MAPPING", "AutoFeatureExtractor"],
|
"feature_extraction_auto": ["FEATURE_EXTRACTOR_MAPPING", "AutoFeatureExtractor"],
|
||||||
"tokenization_auto": ["TOKENIZER_MAPPING", "AutoTokenizer"],
|
"tokenization_auto": ["TOKENIZER_MAPPING", "AutoTokenizer"],
|
||||||
@@ -104,6 +105,7 @@ if is_flax_available():
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from .auto_factory import get_values
|
||||||
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, MODEL_NAMES_MAPPING, AutoConfig
|
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, MODEL_NAMES_MAPPING, AutoConfig
|
||||||
from .feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
|
from .feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
|
||||||
from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
||||||
|
|||||||
@@ -328,6 +328,26 @@ FROM_PRETRAINED_FLAX_DOCSTRING = """
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _get_model_class(config, model_mapping):
|
||||||
|
supported_models = model_mapping[type(config)]
|
||||||
|
if not isinstance(supported_models, (list, tuple)):
|
||||||
|
return supported_models
|
||||||
|
|
||||||
|
name_to_model = {model.__name__: model for model in supported_models}
|
||||||
|
architectures = getattr(config, "architectures", [])
|
||||||
|
for arch in architectures:
|
||||||
|
if arch in name_to_model:
|
||||||
|
return name_to_model[arch]
|
||||||
|
elif f"TF{arch}" in name_to_model:
|
||||||
|
return name_to_model[f"TF{arch}"]
|
||||||
|
elif f"Flax{arch}" in name_to_model:
|
||||||
|
return name_to_model[f"Flax{arch}"]
|
||||||
|
|
||||||
|
# If not architecture is set in the config or match the supported models, the first element of the tuple is the
|
||||||
|
# defaults.
|
||||||
|
return supported_models[0]
|
||||||
|
|
||||||
|
|
||||||
class _BaseAutoModelClass:
|
class _BaseAutoModelClass:
|
||||||
# Base class for auto models.
|
# Base class for auto models.
|
||||||
_model_mapping = None
|
_model_mapping = None
|
||||||
@@ -341,7 +361,8 @@ class _BaseAutoModelClass:
|
|||||||
|
|
||||||
def from_config(cls, config, **kwargs):
|
def from_config(cls, config, **kwargs):
|
||||||
if type(config) in cls._model_mapping.keys():
|
if type(config) in cls._model_mapping.keys():
|
||||||
return cls._model_mapping[type(config)](config, **kwargs)
|
model_class = _get_model_class(config, cls._model_mapping)
|
||||||
|
return model_class(config, **kwargs)
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
|
f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
|
||||||
f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
|
f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
|
||||||
@@ -356,9 +377,8 @@ class _BaseAutoModelClass:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if type(config) in cls._model_mapping.keys():
|
if type(config) in cls._model_mapping.keys():
|
||||||
return cls._model_mapping[type(config)].from_pretrained(
|
model_class = _get_model_class(config, cls._model_mapping)
|
||||||
pretrained_model_name_or_path, *model_args, config=config, **kwargs
|
return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
|
||||||
)
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
|
f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
|
||||||
f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
|
f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
|
||||||
@@ -418,3 +438,14 @@ def auto_class_factory(name, model_mapping, checkpoint_for_example="bert-base-ca
|
|||||||
from_pretrained = replace_list_option_in_docstrings(model_mapping)(from_pretrained)
|
from_pretrained = replace_list_option_in_docstrings(model_mapping)(from_pretrained)
|
||||||
new_class.from_pretrained = classmethod(from_pretrained)
|
new_class.from_pretrained = classmethod(from_pretrained)
|
||||||
return new_class
|
return new_class
|
||||||
|
|
||||||
|
|
||||||
|
def get_values(model_mapping):
|
||||||
|
result = []
|
||||||
|
for model in model_mapping.values():
|
||||||
|
if isinstance(model, (list, tuple)):
|
||||||
|
result += list(model)
|
||||||
|
else:
|
||||||
|
result.append(model)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|||||||
@@ -247,29 +247,38 @@ MODEL_NAMES_MAPPING = OrderedDict(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_class_name(model_class):
|
||||||
|
if isinstance(model_class, (list, tuple)):
|
||||||
|
return " or ".join([f":class:`~transformers.{c.__name__}`" for c in model_class])
|
||||||
|
return f":class:`~transformers.{model_class.__name__}`"
|
||||||
|
|
||||||
|
|
||||||
def _list_model_options(indent, config_to_class=None, use_model_types=True):
|
def _list_model_options(indent, config_to_class=None, use_model_types=True):
|
||||||
if config_to_class is None and not use_model_types:
|
if config_to_class is None and not use_model_types:
|
||||||
raise ValueError("Using `use_model_types=False` requires a `config_to_class` dictionary.")
|
raise ValueError("Using `use_model_types=False` requires a `config_to_class` dictionary.")
|
||||||
if use_model_types:
|
if use_model_types:
|
||||||
if config_to_class is None:
|
if config_to_class is None:
|
||||||
model_type_to_name = {model_type: config.__name__ for model_type, config in CONFIG_MAPPING.items()}
|
model_type_to_name = {
|
||||||
|
model_type: f":class:`~transformers.{config.__name__}`"
|
||||||
|
for model_type, config in CONFIG_MAPPING.items()
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
model_type_to_name = {
|
model_type_to_name = {
|
||||||
model_type: config_to_class[config].__name__
|
model_type: _get_class_name(config_to_class[config])
|
||||||
for model_type, config in CONFIG_MAPPING.items()
|
for model_type, config in CONFIG_MAPPING.items()
|
||||||
if config in config_to_class
|
if config in config_to_class
|
||||||
}
|
}
|
||||||
lines = [
|
lines = [
|
||||||
f"{indent}- **{model_type}** -- :class:`~transformers.{model_type_to_name[model_type]}` ({MODEL_NAMES_MAPPING[model_type]} model)"
|
f"{indent}- **{model_type}** -- {model_type_to_name[model_type]} ({MODEL_NAMES_MAPPING[model_type]} model)"
|
||||||
for model_type in sorted(model_type_to_name.keys())
|
for model_type in sorted(model_type_to_name.keys())
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
config_to_name = {config.__name__: clas.__name__ for config, clas in config_to_class.items()}
|
config_to_name = {config.__name__: _get_class_name(clas) for config, clas in config_to_class.items()}
|
||||||
config_to_model_name = {
|
config_to_model_name = {
|
||||||
config.__name__: MODEL_NAMES_MAPPING[model_type] for model_type, config in CONFIG_MAPPING.items()
|
config.__name__: MODEL_NAMES_MAPPING[model_type] for model_type, config in CONFIG_MAPPING.items()
|
||||||
}
|
}
|
||||||
lines = [
|
lines = [
|
||||||
f"{indent}- :class:`~transformers.{config_name}` configuration class: :class:`~transformers.{config_to_name[config_name]}` ({config_to_model_name[config_name]} model)"
|
f"{indent}- :class:`~transformers.{config_name}` configuration class: {config_to_name[config_name]} ({config_to_model_name[config_name]} model)"
|
||||||
for config_name in sorted(config_to_name.keys())
|
for config_name in sorted(config_to_name.keys())
|
||||||
]
|
]
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|||||||
@@ -124,6 +124,7 @@ from ..flaubert.modeling_flaubert import (
|
|||||||
)
|
)
|
||||||
from ..fsmt.modeling_fsmt import FSMTForConditionalGeneration, FSMTModel
|
from ..fsmt.modeling_fsmt import FSMTForConditionalGeneration, FSMTModel
|
||||||
from ..funnel.modeling_funnel import (
|
from ..funnel.modeling_funnel import (
|
||||||
|
FunnelBaseModel,
|
||||||
FunnelForMaskedLM,
|
FunnelForMaskedLM,
|
||||||
FunnelForMultipleChoice,
|
FunnelForMultipleChoice,
|
||||||
FunnelForPreTraining,
|
FunnelForPreTraining,
|
||||||
@@ -377,7 +378,7 @@ MODEL_MAPPING = OrderedDict(
|
|||||||
(CTRLConfig, CTRLModel),
|
(CTRLConfig, CTRLModel),
|
||||||
(ElectraConfig, ElectraModel),
|
(ElectraConfig, ElectraModel),
|
||||||
(ReformerConfig, ReformerModel),
|
(ReformerConfig, ReformerModel),
|
||||||
(FunnelConfig, FunnelModel),
|
(FunnelConfig, (FunnelModel, FunnelBaseModel)),
|
||||||
(LxmertConfig, LxmertModel),
|
(LxmertConfig, LxmertModel),
|
||||||
(BertGenerationConfig, BertGenerationEncoder),
|
(BertGenerationConfig, BertGenerationEncoder),
|
||||||
(DebertaConfig, DebertaModel),
|
(DebertaConfig, DebertaModel),
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ from ..flaubert.modeling_tf_flaubert import (
|
|||||||
TFFlaubertWithLMHeadModel,
|
TFFlaubertWithLMHeadModel,
|
||||||
)
|
)
|
||||||
from ..funnel.modeling_tf_funnel import (
|
from ..funnel.modeling_tf_funnel import (
|
||||||
|
TFFunnelBaseModel,
|
||||||
TFFunnelForMaskedLM,
|
TFFunnelForMaskedLM,
|
||||||
TFFunnelForMultipleChoice,
|
TFFunnelForMultipleChoice,
|
||||||
TFFunnelForPreTraining,
|
TFFunnelForPreTraining,
|
||||||
@@ -242,7 +243,7 @@ TF_MODEL_MAPPING = OrderedDict(
|
|||||||
(XLMConfig, TFXLMModel),
|
(XLMConfig, TFXLMModel),
|
||||||
(CTRLConfig, TFCTRLModel),
|
(CTRLConfig, TFCTRLModel),
|
||||||
(ElectraConfig, TFElectraModel),
|
(ElectraConfig, TFElectraModel),
|
||||||
(FunnelConfig, TFFunnelModel),
|
(FunnelConfig, (TFFunnelModel, TFFunnelBaseModel)),
|
||||||
(DPRConfig, TFDPRQuestionEncoder),
|
(DPRConfig, TFDPRQuestionEncoder),
|
||||||
(MPNetConfig, TFMPNetModel),
|
(MPNetConfig, TFMPNetModel),
|
||||||
(BartConfig, TFBartModel),
|
(BartConfig, TFBartModel),
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import unittest
|
|||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Iterator, Union
|
||||||
|
|
||||||
from .file_utils import (
|
from .file_utils import (
|
||||||
is_datasets_available,
|
is_datasets_available,
|
||||||
@@ -621,6 +622,27 @@ class CaptureLogger:
|
|||||||
return f"captured: {self.out}\n"
|
return f"captured: {self.out}\n"
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
# adapted from https://stackoverflow.com/a/64789046/9201239
|
||||||
|
def ExtendSysPath(path: Union[str, os.PathLike]) -> Iterator[None]:
|
||||||
|
"""
|
||||||
|
Temporary add given path to `sys.path`.
|
||||||
|
|
||||||
|
Usage ::
|
||||||
|
|
||||||
|
with ExtendSysPath('/path/to/dir'):
|
||||||
|
mymodule = importlib.import_module('mymodule')
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
path = os.fspath(path)
|
||||||
|
try:
|
||||||
|
sys.path.insert(0, path)
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
sys.path.remove(path)
|
||||||
|
|
||||||
|
|
||||||
class TestCasePlus(unittest.TestCase):
|
class TestCasePlus(unittest.TestCase):
|
||||||
"""
|
"""
|
||||||
This class extends `unittest.TestCase` with additional features.
|
This class extends `unittest.TestCase` with additional features.
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ from torch.utils.data.distributed import DistributedSampler
|
|||||||
from torch.utils.data.sampler import RandomSampler, SequentialSampler
|
from torch.utils.data.sampler import RandomSampler, SequentialSampler
|
||||||
|
|
||||||
from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator
|
from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator
|
||||||
|
from .dependency_versions_check import dep_version_check
|
||||||
from .file_utils import (
|
from .file_utils import (
|
||||||
WEIGHTS_NAME,
|
WEIGHTS_NAME,
|
||||||
is_apex_available,
|
is_apex_available,
|
||||||
@@ -139,17 +140,14 @@ if is_torch_tpu_available():
|
|||||||
import torch_xla.distributed.parallel_loader as pl
|
import torch_xla.distributed.parallel_loader as pl
|
||||||
|
|
||||||
if is_fairscale_available():
|
if is_fairscale_available():
|
||||||
|
dep_version_check("fairscale")
|
||||||
import fairscale
|
import fairscale
|
||||||
|
from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP
|
||||||
from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
|
from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
|
||||||
|
from fairscale.nn.wrap import auto_wrap
|
||||||
from fairscale.optim import OSS
|
from fairscale.optim import OSS
|
||||||
from fairscale.optim.grad_scaler import ShardedGradScaler
|
from fairscale.optim.grad_scaler import ShardedGradScaler
|
||||||
|
|
||||||
if version.parse(fairscale.__version__) >= version.parse("0.3"):
|
|
||||||
from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP
|
|
||||||
from fairscale.nn.wrap import auto_wrap
|
|
||||||
else:
|
|
||||||
FullyShardedDDP = None
|
|
||||||
|
|
||||||
if is_sagemaker_dp_enabled():
|
if is_sagemaker_dp_enabled():
|
||||||
import smdistributed.dataparallel.torch.distributed as dist
|
import smdistributed.dataparallel.torch.distributed as dist
|
||||||
from smdistributed.dataparallel.torch.parallel.distributed import DistributedDataParallel as DDP
|
from smdistributed.dataparallel.torch.parallel.distributed import DistributedDataParallel as DDP
|
||||||
|
|||||||
@@ -531,6 +531,12 @@ class TrainingArguments:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
|
# Handle --use_env option in torch.distributed.launch (local_rank not passed as an arg then).
|
||||||
|
# This needs to happen before any call to self.device or self.n_gpu.
|
||||||
|
env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
|
||||||
|
if env_local_rank != -1 and env_local_rank != self.local_rank:
|
||||||
|
self.local_rank = env_local_rank
|
||||||
|
|
||||||
# expand paths, if not os.makedirs("~/bar") will make directory
|
# expand paths, if not os.makedirs("~/bar") will make directory
|
||||||
# in the current directory instead of the actual home
|
# in the current directory instead of the actual home
|
||||||
# see https://github.com/huggingface/transformers/issues/10628
|
# see https://github.com/huggingface/transformers/issues/10628
|
||||||
|
|||||||
@@ -60,6 +60,12 @@ def require_version(requirement: str, hint: Optional[str] = None) -> None:
|
|||||||
Args:
|
Args:
|
||||||
requirement (:obj:`str`): pip style definition, e.g., "tokenizers==0.9.4", "tqdm>=4.27", "numpy"
|
requirement (:obj:`str`): pip style definition, e.g., "tokenizers==0.9.4", "tqdm>=4.27", "numpy"
|
||||||
hint (:obj:`str`, `optional`): what suggestion to print in case of requirements not being met
|
hint (:obj:`str`, `optional`): what suggestion to print in case of requirements not being met
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
require_version("pandas>1.1.2")
|
||||||
|
require_version("numpy>1.18.5", "this is important to have for whatever reason")
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
hint = f"\n{hint}" if hint is not None else ""
|
hint = f"\n{hint}" if hint is not None else ""
|
||||||
|
|||||||
@@ -16,16 +16,16 @@ import dataclasses
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import unittest
|
import unittest
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
from parameterized import parameterized
|
from parameterized import parameterized
|
||||||
from transformers import TrainingArguments
|
from transformers import TrainingArguments, is_torch_available
|
||||||
from transformers.file_utils import WEIGHTS_NAME
|
from transformers.file_utils import WEIGHTS_NAME
|
||||||
from transformers.integrations import is_deepspeed_available
|
from transformers.integrations import is_deepspeed_available
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
CaptureLogger,
|
CaptureLogger,
|
||||||
|
ExtendSysPath,
|
||||||
TestCasePlus,
|
TestCasePlus,
|
||||||
execute_subprocess_async,
|
execute_subprocess_async,
|
||||||
get_gpu_count,
|
get_gpu_count,
|
||||||
@@ -38,8 +38,11 @@ from transformers.trainer_utils import set_seed
|
|||||||
|
|
||||||
|
|
||||||
bindir = os.path.abspath(os.path.dirname(__file__))
|
bindir = os.path.abspath(os.path.dirname(__file__))
|
||||||
sys.path.append(f"{bindir}/../../../tests")
|
with ExtendSysPath(f"{bindir}/.."):
|
||||||
from test_trainer import TrainerIntegrationCommon, get_regression_trainer # noqa
|
from test_trainer import TrainerIntegrationCommon # noqa
|
||||||
|
|
||||||
|
if is_torch_available():
|
||||||
|
from test_trainer import get_regression_trainer # noqa
|
||||||
|
|
||||||
|
|
||||||
set_seed(42)
|
set_seed(42)
|
||||||
@@ -21,6 +21,7 @@ from unittest.mock import patch
|
|||||||
from transformers.file_utils import is_apex_available
|
from transformers.file_utils import is_apex_available
|
||||||
from transformers.integrations import is_fairscale_available
|
from transformers.integrations import is_fairscale_available
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
|
ExtendSysPath,
|
||||||
TestCasePlus,
|
TestCasePlus,
|
||||||
execute_subprocess_async,
|
execute_subprocess_async,
|
||||||
get_gpu_count,
|
get_gpu_count,
|
||||||
@@ -34,8 +35,8 @@ from transformers.trainer_utils import set_seed
|
|||||||
|
|
||||||
|
|
||||||
bindir = os.path.abspath(os.path.dirname(__file__))
|
bindir = os.path.abspath(os.path.dirname(__file__))
|
||||||
sys.path.append(f"{bindir}/../../seq2seq")
|
with ExtendSysPath(f"{bindir}/../../examples/seq2seq"):
|
||||||
from run_translation import main # noqa
|
from run_translation import main # noqa
|
||||||
|
|
||||||
|
|
||||||
set_seed(42)
|
set_seed(42)
|
||||||
@@ -136,10 +136,7 @@ images:
|
|||||||
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /,
|
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /,
|
||||||
*CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
|
*CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
|
||||||
```
|
```
|
||||||
2. In the PR comment describe what test we ran and with which framework versions. Here you can copy the table from [Current Tests](#current-tests).
|
2. In the PR comment describe what test we ran and with which framework versions. Here you can copy the table from [Current Tests](#current-tests). You can take a look at this [PR](https://github.com/aws/deep-learning-containers/pull/1016), which information are needed.
|
||||||
|
|
||||||
TODO: Add a screenshot of PR + Text template to make it easy to open.
|
|
||||||
|
|
||||||
|
|
||||||
## Current Tests
|
## Current Tests
|
||||||
|
|
||||||
|
|||||||
@@ -146,11 +146,8 @@ class DataCollatorIntegrationTest(unittest.TestCase):
|
|||||||
self.assertEqual(batch["labels"].shape, torch.Size([2, 6]))
|
self.assertEqual(batch["labels"].shape, torch.Size([2, 6]))
|
||||||
self.assertEqual(batch["labels"][0].tolist(), [0, 1, 2] + [-1] * 3)
|
self.assertEqual(batch["labels"][0].tolist(), [0, 1, 2] + [-1] * 3)
|
||||||
|
|
||||||
def test_data_collator_for_language_modeling(self):
|
def _test_no_pad_and_pad(self, no_pad_features, pad_features):
|
||||||
tokenizer = BertTokenizer(self.vocab_file)
|
tokenizer = BertTokenizer(self.vocab_file)
|
||||||
no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
|
|
||||||
pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
|
|
||||||
|
|
||||||
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
||||||
batch = data_collator(no_pad_features)
|
batch = data_collator(no_pad_features)
|
||||||
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10)))
|
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10)))
|
||||||
@@ -160,6 +157,15 @@ class DataCollatorIntegrationTest(unittest.TestCase):
|
|||||||
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10)))
|
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10)))
|
||||||
self.assertEqual(batch["labels"].shape, torch.Size((2, 10)))
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 10)))
|
||||||
|
|
||||||
|
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, pad_to_multiple_of=8)
|
||||||
|
batch = data_collator(no_pad_features)
|
||||||
|
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
|
||||||
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
|
||||||
|
|
||||||
|
batch = data_collator(pad_features)
|
||||||
|
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
|
||||||
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
|
||||||
|
|
||||||
tokenizer._pad_token = None
|
tokenizer._pad_token = None
|
||||||
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
@@ -185,6 +191,32 @@ class DataCollatorIntegrationTest(unittest.TestCase):
|
|||||||
self.assertTrue(torch.any(masked_tokens))
|
self.assertTrue(torch.any(masked_tokens))
|
||||||
self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist()))
|
self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist()))
|
||||||
|
|
||||||
|
data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8)
|
||||||
|
batch = data_collator(no_pad_features)
|
||||||
|
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
|
||||||
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
|
||||||
|
|
||||||
|
masked_tokens = batch["input_ids"] == tokenizer.mask_token_id
|
||||||
|
self.assertTrue(torch.any(masked_tokens))
|
||||||
|
self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist()))
|
||||||
|
|
||||||
|
batch = data_collator(pad_features)
|
||||||
|
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
|
||||||
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
|
||||||
|
|
||||||
|
masked_tokens = batch["input_ids"] == tokenizer.mask_token_id
|
||||||
|
self.assertTrue(torch.any(masked_tokens))
|
||||||
|
self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist()))
|
||||||
|
|
||||||
|
def test_data_collator_for_language_modeling(self):
|
||||||
|
no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
|
||||||
|
pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
|
||||||
|
self._test_no_pad_and_pad(no_pad_features, pad_features)
|
||||||
|
|
||||||
|
no_pad_features = [list(range(10)), list(range(10))]
|
||||||
|
pad_features = [list(range(5)), list(range(10))]
|
||||||
|
self._test_no_pad_and_pad(no_pad_features, pad_features)
|
||||||
|
|
||||||
def test_plm(self):
|
def test_plm(self):
|
||||||
tokenizer = BertTokenizer(self.vocab_file)
|
tokenizer = BertTokenizer(self.vocab_file)
|
||||||
no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
|
no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
|
||||||
@@ -225,6 +257,14 @@ class DataCollatorIntegrationTest(unittest.TestCase):
|
|||||||
self.assertEqual(batch["labels"].shape, torch.Size((2, 5)))
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 5)))
|
||||||
self.assertEqual(batch["next_sentence_label"].shape, torch.Size((2,)))
|
self.assertEqual(batch["next_sentence_label"].shape, torch.Size((2,)))
|
||||||
|
|
||||||
|
data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8)
|
||||||
|
batch = data_collator(features)
|
||||||
|
|
||||||
|
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 8)))
|
||||||
|
self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 8)))
|
||||||
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 8)))
|
||||||
|
self.assertEqual(batch["next_sentence_label"].shape, torch.Size((2,)))
|
||||||
|
|
||||||
def test_sop(self):
|
def test_sop(self):
|
||||||
tokenizer = BertTokenizer(self.vocab_file)
|
tokenizer = BertTokenizer(self.vocab_file)
|
||||||
features = [
|
features = [
|
||||||
@@ -242,3 +282,11 @@ class DataCollatorIntegrationTest(unittest.TestCase):
|
|||||||
self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 5)))
|
self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 5)))
|
||||||
self.assertEqual(batch["labels"].shape, torch.Size((2, 5)))
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 5)))
|
||||||
self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,)))
|
self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,)))
|
||||||
|
|
||||||
|
data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8)
|
||||||
|
batch = data_collator(features)
|
||||||
|
|
||||||
|
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 8)))
|
||||||
|
self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 8)))
|
||||||
|
self.assertEqual(batch["labels"].shape, torch.Size((2, 8)))
|
||||||
|
self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,)))
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_torch, slow, torch_device
|
from transformers.testing_utils import require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -234,7 +235,7 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -13,7 +13,8 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
@@ -46,6 +47,8 @@ if is_torch_available():
|
|||||||
BertForSequenceClassification,
|
BertForSequenceClassification,
|
||||||
BertForTokenClassification,
|
BertForTokenClassification,
|
||||||
BertModel,
|
BertModel,
|
||||||
|
FunnelBaseModel,
|
||||||
|
FunnelModel,
|
||||||
GPT2Config,
|
GPT2Config,
|
||||||
GPT2LMHeadModel,
|
GPT2LMHeadModel,
|
||||||
RobertaForMaskedLM,
|
RobertaForMaskedLM,
|
||||||
@@ -218,6 +221,21 @@ class AutoModelTest(unittest.TestCase):
|
|||||||
self.assertEqual(model.num_parameters(), 14410)
|
self.assertEqual(model.num_parameters(), 14410)
|
||||||
self.assertEqual(model.num_parameters(only_trainable=True), 14410)
|
self.assertEqual(model.num_parameters(only_trainable=True), 14410)
|
||||||
|
|
||||||
|
def test_from_pretrained_with_tuple_values(self):
|
||||||
|
# For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel
|
||||||
|
model = AutoModel.from_pretrained("sgugger/funnel-random-tiny")
|
||||||
|
self.assertIsInstance(model, FunnelModel)
|
||||||
|
|
||||||
|
config = copy.deepcopy(model.config)
|
||||||
|
config.architectures = ["FunnelBaseModel"]
|
||||||
|
model = AutoModel.from_config(config)
|
||||||
|
self.assertIsInstance(model, FunnelBaseModel)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
model.save_pretrained(tmp_dir)
|
||||||
|
model = AutoModel.from_pretrained(tmp_dir)
|
||||||
|
self.assertIsInstance(model, FunnelBaseModel)
|
||||||
|
|
||||||
def test_parents_and_children_in_mappings(self):
|
def test_parents_and_children_in_mappings(self):
|
||||||
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
|
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
|
||||||
# by the parents and will return the wrong configuration type when using auto models
|
# by the parents and will return the wrong configuration type when using auto models
|
||||||
@@ -242,6 +260,12 @@ class AutoModelTest(unittest.TestCase):
|
|||||||
assert not issubclass(
|
assert not issubclass(
|
||||||
child_config, parent_config
|
child_config, parent_config
|
||||||
), f"{child_config.__name__} is child of {parent_config.__name__}"
|
), f"{child_config.__name__} is child of {parent_config.__name__}"
|
||||||
assert not issubclass(
|
|
||||||
child_model, parent_model
|
# Tuplify child_model and parent_model since some of them could be tuples.
|
||||||
), f"{child_config.__name__} is child of {parent_config.__name__}"
|
if not isinstance(child_model, (list, tuple)):
|
||||||
|
child_model = (child_model,)
|
||||||
|
if not isinstance(parent_model, (list, tuple)):
|
||||||
|
parent_model = (parent_model,)
|
||||||
|
|
||||||
|
for child, parent in [(a, b) for a in child_model for b in parent_model]:
|
||||||
|
assert not issubclass(child, parent), f"{child.__name__} is child of {parent.__name__}"
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_torch, slow, torch_device
|
from transformers.testing_utils import require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -444,7 +445,7 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import unittest
|
|||||||
|
|
||||||
from tests.test_modeling_common import floats_tensor
|
from tests.test_modeling_common import floats_tensor
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.models.big_bird.tokenization_big_bird import BigBirdTokenizer
|
from transformers.models.big_bird.tokenization_big_bird import BigBirdTokenizer
|
||||||
from transformers.testing_utils import require_torch, slow, torch_device
|
from transformers.testing_utils import require_torch, slow, torch_device
|
||||||
|
|
||||||
@@ -458,7 +459,7 @@ class BigBirdModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ from typing import List, Tuple
|
|||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
from transformers.file_utils import WEIGHTS_NAME
|
from transformers.file_utils import WEIGHTS_NAME
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device
|
from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device
|
||||||
|
|
||||||
|
|
||||||
@@ -79,7 +80,7 @@ class ModelTesterMixin:
|
|||||||
|
|
||||||
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
||||||
inputs_dict = copy.deepcopy(inputs_dict)
|
inputs_dict = copy.deepcopy(inputs_dict)
|
||||||
if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||||
inputs_dict = {
|
inputs_dict = {
|
||||||
k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous()
|
k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous()
|
||||||
if isinstance(v, torch.Tensor) and v.ndim > 1
|
if isinstance(v, torch.Tensor) and v.ndim > 1
|
||||||
@@ -88,9 +89,9 @@ class ModelTesterMixin:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||||
inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device)
|
inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device)
|
||||||
elif model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
|
elif model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
|
||||||
inputs_dict["start_positions"] = torch.zeros(
|
inputs_dict["start_positions"] = torch.zeros(
|
||||||
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
@@ -98,18 +99,18 @@ class ModelTesterMixin:
|
|||||||
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
elif model_class in [
|
elif model_class in [
|
||||||
*MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(),
|
*get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
|
||||||
*MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values(),
|
*get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING),
|
||||||
*MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING.values(),
|
*get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),
|
||||||
]:
|
]:
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
elif model_class in [
|
elif model_class in [
|
||||||
*MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(),
|
*get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
|
||||||
*MODEL_FOR_CAUSAL_LM_MAPPING.values(),
|
*get_values(MODEL_FOR_CAUSAL_LM_MAPPING),
|
||||||
*MODEL_FOR_MASKED_LM_MAPPING.values(),
|
*get_values(MODEL_FOR_MASKED_LM_MAPPING),
|
||||||
*MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(),
|
*get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
|
||||||
]:
|
]:
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
@@ -229,7 +230,7 @@ class ModelTesterMixin:
|
|||||||
config.return_dict = True
|
config.return_dict = True
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
if model_class in MODEL_MAPPING.values():
|
if model_class in get_values(MODEL_MAPPING):
|
||||||
continue
|
continue
|
||||||
model = model_class(config)
|
model = model_class(config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
@@ -248,7 +249,7 @@ class ModelTesterMixin:
|
|||||||
config.return_dict = True
|
config.return_dict = True
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
if model_class in MODEL_MAPPING.values():
|
if model_class in get_values(MODEL_MAPPING):
|
||||||
continue
|
continue
|
||||||
model = model_class(config)
|
model = model_class(config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
@@ -312,7 +313,7 @@ class ModelTesterMixin:
|
|||||||
if "labels" in inputs_dict:
|
if "labels" in inputs_dict:
|
||||||
correct_outlen += 1 # loss is added to beginning
|
correct_outlen += 1 # loss is added to beginning
|
||||||
# Question Answering model returns start_logits and end_logits
|
# Question Answering model returns start_logits and end_logits
|
||||||
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
|
||||||
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
|
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
|
||||||
if "past_key_values" in outputs:
|
if "past_key_values" in outputs:
|
||||||
correct_outlen += 1 # past_key_values have been returned
|
correct_outlen += 1 # past_key_values have been returned
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import unittest
|
|||||||
|
|
||||||
from tests.test_modeling_common import floats_tensor
|
from tests.test_modeling_common import floats_tensor
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_torch, slow, torch_device
|
from transformers.testing_utils import require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -352,7 +353,7 @@ class ConvBertModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
if "labels" in inputs_dict:
|
if "labels" in inputs_dict:
|
||||||
correct_outlen += 1 # loss is added to beginning
|
correct_outlen += 1 # loss is added to beginning
|
||||||
# Question Answering model returns start_logits and end_logits
|
# Question Answering model returns start_logits and end_logits
|
||||||
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
|
||||||
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
|
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
|
||||||
if "past_key_values" in outputs:
|
if "past_key_values" in outputs:
|
||||||
correct_outlen += 1 # past_key_values have been returned
|
correct_outlen += 1 # past_key_values have been returned
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_torch, slow, torch_device
|
from transformers.testing_utils import require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -292,7 +293,7 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ if is_flax_available():
|
|||||||
FlaxBertForNextSentencePrediction,
|
FlaxBertForNextSentencePrediction,
|
||||||
FlaxBertForPreTraining,
|
FlaxBertForPreTraining,
|
||||||
FlaxBertForQuestionAnswering,
|
FlaxBertForQuestionAnswering,
|
||||||
|
FlaxBertForSequenceClassification,
|
||||||
FlaxBertForTokenClassification,
|
FlaxBertForTokenClassification,
|
||||||
FlaxBertModel,
|
FlaxBertModel,
|
||||||
)
|
)
|
||||||
@@ -125,6 +126,7 @@ class FlaxBertModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
|||||||
FlaxBertForMultipleChoice,
|
FlaxBertForMultipleChoice,
|
||||||
FlaxBertForQuestionAnswering,
|
FlaxBertForQuestionAnswering,
|
||||||
FlaxBertForNextSentencePrediction,
|
FlaxBertForNextSentencePrediction,
|
||||||
|
FlaxBertForSequenceClassification,
|
||||||
FlaxBertForTokenClassification,
|
FlaxBertForTokenClassification,
|
||||||
FlaxBertForQuestionAnswering,
|
FlaxBertForQuestionAnswering,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import FunnelTokenizer, is_torch_available
|
from transformers import FunnelTokenizer, is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -365,7 +366,7 @@ class FunnelModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import unittest
|
|||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
from transformers.file_utils import cached_property
|
from transformers.file_utils import cached_property
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -412,7 +413,7 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
|
|||||||
if "labels" in inputs_dict:
|
if "labels" in inputs_dict:
|
||||||
correct_outlen += 1 # loss is added to beginning
|
correct_outlen += 1 # loss is added to beginning
|
||||||
# Question Answering model returns start_logits and end_logits
|
# Question Answering model returns start_logits and end_logits
|
||||||
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
|
||||||
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
|
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
|
||||||
if "past_key_values" in outputs:
|
if "past_key_values" in outputs:
|
||||||
correct_outlen += 1 # past_key_values have been returned
|
correct_outlen += 1 # past_key_values have been returned
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import copy
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_torch, slow, torch_device
|
from transformers.testing_utils import require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -532,11 +533,11 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = copy.deepcopy(inputs_dict)
|
inputs_dict = copy.deepcopy(inputs_dict)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
elif model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
|
elif model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
# special case for models like BERT that use multi-loss training for PreTraining
|
# special case for models like BERT that use multi-loss training for PreTraining
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import os
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -291,7 +292,7 @@ class MegatronBertModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_torch_available
|
from transformers import is_torch_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -272,7 +273,7 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ from transformers import (
|
|||||||
is_torch_available,
|
is_torch_available,
|
||||||
)
|
)
|
||||||
from transformers.file_utils import cached_property
|
from transformers.file_utils import cached_property
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_scatter, require_torch, slow, torch_device
|
from transformers.testing_utils import require_scatter, require_torch, slow, torch_device
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -425,7 +426,7 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
|
|
||||||
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
|
||||||
inputs_dict = copy.deepcopy(inputs_dict)
|
inputs_dict = copy.deepcopy(inputs_dict)
|
||||||
if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||||
inputs_dict = {
|
inputs_dict = {
|
||||||
k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous()
|
k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous()
|
||||||
if isinstance(v, torch.Tensor) and v.ndim > 1
|
if isinstance(v, torch.Tensor) and v.ndim > 1
|
||||||
@@ -434,9 +435,9 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
|
if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||||
inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device)
|
inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device)
|
||||||
elif model_class in MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING.values():
|
elif model_class in get_values(MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING):
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
@@ -457,17 +458,17 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase):
|
|||||||
self.model_tester.batch_size, dtype=torch.float, device=torch_device
|
self.model_tester.batch_size, dtype=torch.float, device=torch_device
|
||||||
)
|
)
|
||||||
elif model_class in [
|
elif model_class in [
|
||||||
*MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(),
|
*get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
|
||||||
*MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values(),
|
*get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING),
|
||||||
]:
|
]:
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
self.model_tester.batch_size, dtype=torch.long, device=torch_device
|
||||||
)
|
)
|
||||||
elif model_class in [
|
elif model_class in [
|
||||||
*MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(),
|
*get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
|
||||||
*MODEL_FOR_CAUSAL_LM_MAPPING.values(),
|
*get_values(MODEL_FOR_CAUSAL_LM_MAPPING),
|
||||||
*MODEL_FOR_MASKED_LM_MAPPING.values(),
|
*get_values(MODEL_FOR_MASKED_LM_MAPPING),
|
||||||
*MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(),
|
*get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
|
||||||
]:
|
]:
|
||||||
inputs_dict["labels"] = torch.zeros(
|
inputs_dict["labels"] = torch.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import AlbertConfig, is_tf_available
|
from transformers import AlbertConfig, is_tf_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_tf, slow
|
from transformers.testing_utils import require_tf, slow
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -249,7 +250,7 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(TF_MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["sentence_order_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
inputs_dict["sentence_order_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||||
|
|
||||||
return inputs_dict
|
return inputs_dict
|
||||||
|
|||||||
@@ -13,7 +13,8 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_tf_available
|
from transformers import is_tf_available
|
||||||
@@ -39,6 +40,8 @@ if is_tf_available():
|
|||||||
TFBertForQuestionAnswering,
|
TFBertForQuestionAnswering,
|
||||||
TFBertForSequenceClassification,
|
TFBertForSequenceClassification,
|
||||||
TFBertModel,
|
TFBertModel,
|
||||||
|
TFFunnelBaseModel,
|
||||||
|
TFFunnelModel,
|
||||||
TFGPT2LMHeadModel,
|
TFGPT2LMHeadModel,
|
||||||
TFRobertaForMaskedLM,
|
TFRobertaForMaskedLM,
|
||||||
TFT5ForConditionalGeneration,
|
TFT5ForConditionalGeneration,
|
||||||
@@ -176,6 +179,21 @@ class TFAutoModelTest(unittest.TestCase):
|
|||||||
self.assertEqual(model.num_parameters(), 14410)
|
self.assertEqual(model.num_parameters(), 14410)
|
||||||
self.assertEqual(model.num_parameters(only_trainable=True), 14410)
|
self.assertEqual(model.num_parameters(only_trainable=True), 14410)
|
||||||
|
|
||||||
|
def test_from_pretrained_with_tuple_values(self):
|
||||||
|
# For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel
|
||||||
|
model = TFAutoModel.from_pretrained("sgugger/funnel-random-tiny")
|
||||||
|
self.assertIsInstance(model, TFFunnelModel)
|
||||||
|
|
||||||
|
config = copy.deepcopy(model.config)
|
||||||
|
config.architectures = ["FunnelBaseModel"]
|
||||||
|
model = TFAutoModel.from_config(config)
|
||||||
|
self.assertIsInstance(model, TFFunnelBaseModel)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
model.save_pretrained(tmp_dir)
|
||||||
|
model = TFAutoModel.from_pretrained(tmp_dir)
|
||||||
|
self.assertIsInstance(model, TFFunnelBaseModel)
|
||||||
|
|
||||||
def test_parents_and_children_in_mappings(self):
|
def test_parents_and_children_in_mappings(self):
|
||||||
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
|
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
|
||||||
# by the parents and will return the wrong configuration type when using auto models
|
# by the parents and will return the wrong configuration type when using auto models
|
||||||
@@ -197,4 +215,12 @@ class TFAutoModelTest(unittest.TestCase):
|
|||||||
for parent_config, parent_model in mapping[: index + 1]:
|
for parent_config, parent_model in mapping[: index + 1]:
|
||||||
with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
|
with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
|
||||||
self.assertFalse(issubclass(child_config, parent_config))
|
self.assertFalse(issubclass(child_config, parent_config))
|
||||||
self.assertFalse(issubclass(child_model, parent_model))
|
|
||||||
|
# Tuplify child_model and parent_model since some of them could be tuples.
|
||||||
|
if not isinstance(child_model, (list, tuple)):
|
||||||
|
child_model = (child_model,)
|
||||||
|
if not isinstance(parent_model, (list, tuple)):
|
||||||
|
parent_model = (parent_model,)
|
||||||
|
|
||||||
|
for child, parent in [(a, b) for a in child_model for b in parent_model]:
|
||||||
|
assert not issubclass(child, parent), f"{child.__name__} is child of {parent.__name__}"
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import BertConfig, is_tf_available
|
from transformers import BertConfig, is_tf_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import require_tf, slow
|
from transformers.testing_utils import require_tf, slow
|
||||||
|
|
||||||
from .test_configuration_common import ConfigTester
|
from .test_configuration_common import ConfigTester
|
||||||
@@ -282,7 +283,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
|||||||
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.values():
|
if model_class in get_values(TF_MODEL_FOR_PRETRAINING_MAPPING):
|
||||||
inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||||
|
|
||||||
return inputs_dict
|
return inputs_dict
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ from importlib import import_module
|
|||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from transformers import is_tf_available
|
from transformers import is_tf_available
|
||||||
|
from transformers.models.auto import get_values
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
_tf_gpu_memory_limit,
|
_tf_gpu_memory_limit,
|
||||||
is_pt_tf_cross_test,
|
is_pt_tf_cross_test,
|
||||||
@@ -89,7 +90,7 @@ class TFModelTesterMixin:
|
|||||||
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:
|
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:
|
||||||
inputs_dict = copy.deepcopy(inputs_dict)
|
inputs_dict = copy.deepcopy(inputs_dict)
|
||||||
|
|
||||||
if model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
|
if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||||
inputs_dict = {
|
inputs_dict = {
|
||||||
k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1))
|
k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1))
|
||||||
if isinstance(v, tf.Tensor) and v.ndim > 0
|
if isinstance(v, tf.Tensor) and v.ndim > 0
|
||||||
@@ -98,21 +99,21 @@ class TFModelTesterMixin:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if return_labels:
|
if return_labels:
|
||||||
if model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
|
if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||||
inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32)
|
inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32)
|
||||||
elif model_class in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
|
elif model_class in get_values(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING):
|
||||||
inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||||
inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||||
elif model_class in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values():
|
elif model_class in get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING):
|
||||||
inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||||
elif model_class in TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values():
|
elif model_class in get_values(TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING):
|
||||||
inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
|
||||||
elif model_class in [
|
elif model_class in [
|
||||||
*TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(),
|
*get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
|
||||||
*TF_MODEL_FOR_CAUSAL_LM_MAPPING.values(),
|
*get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING),
|
||||||
*TF_MODEL_FOR_MASKED_LM_MAPPING.values(),
|
*get_values(TF_MODEL_FOR_MASKED_LM_MAPPING),
|
||||||
*TF_MODEL_FOR_PRETRAINING_MAPPING.values(),
|
*get_values(TF_MODEL_FOR_PRETRAINING_MAPPING),
|
||||||
*TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(),
|
*get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
|
||||||
]:
|
]:
|
||||||
inputs_dict["labels"] = tf.zeros(
|
inputs_dict["labels"] = tf.zeros(
|
||||||
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32
|
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32
|
||||||
@@ -580,7 +581,7 @@ class TFModelTesterMixin:
|
|||||||
),
|
),
|
||||||
"input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"),
|
"input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"),
|
||||||
}
|
}
|
||||||
elif model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
|
elif model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
|
||||||
input_ids = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32")
|
input_ids = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32")
|
||||||
else:
|
else:
|
||||||
input_ids = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32")
|
input_ids = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32")
|
||||||
@@ -796,9 +797,9 @@ class TFModelTesterMixin:
|
|||||||
def test_model_common_attributes(self):
|
def test_model_common_attributes(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
list_lm_models = (
|
list_lm_models = (
|
||||||
list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.values())
|
get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING)
|
||||||
+ list(TF_MODEL_FOR_MASKED_LM_MAPPING.values())
|
+ get_values(TF_MODEL_FOR_MASKED_LM_MAPPING)
|
||||||
+ list(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values())
|
+ get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING)
|
||||||
)
|
)
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
@@ -1128,7 +1129,7 @@ class TFModelTesterMixin:
|
|||||||
]
|
]
|
||||||
loss_size = tf.size(added_label)
|
loss_size = tf.size(added_label)
|
||||||
|
|
||||||
if model.__class__ in TF_MODEL_FOR_CAUSAL_LM_MAPPING.values():
|
if model.__class__ in get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING):
|
||||||
# if loss is causal lm loss, labels are shift, so that one label per batch
|
# if loss is causal lm loss, labels are shift, so that one label per batch
|
||||||
# is cut
|
# is cut
|
||||||
loss_size = loss_size - self.model_tester.batch_size
|
loss_size = loss_size - self.model_tester.batch_size
|
||||||
|
|||||||
@@ -19,6 +19,8 @@ import os
|
|||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from transformers.models.auto import get_values
|
||||||
|
|
||||||
|
|
||||||
# All paths are set with the intent you should run this script from the root of the repo with the command
|
# All paths are set with the intent you should run this script from the root of the repo with the command
|
||||||
# python utils/check_repo.py
|
# python utils/check_repo.py
|
||||||
@@ -86,7 +88,6 @@ IGNORE_NON_AUTO_CONFIGURED = [
|
|||||||
"DPRReader",
|
"DPRReader",
|
||||||
"DPRSpanPredictor",
|
"DPRSpanPredictor",
|
||||||
"FlaubertForQuestionAnswering",
|
"FlaubertForQuestionAnswering",
|
||||||
"FunnelBaseModel",
|
|
||||||
"GPT2DoubleHeadsModel",
|
"GPT2DoubleHeadsModel",
|
||||||
"OpenAIGPTDoubleHeadsModel",
|
"OpenAIGPTDoubleHeadsModel",
|
||||||
"RagModel",
|
"RagModel",
|
||||||
@@ -95,7 +96,6 @@ IGNORE_NON_AUTO_CONFIGURED = [
|
|||||||
"T5Stack",
|
"T5Stack",
|
||||||
"TFDPRReader",
|
"TFDPRReader",
|
||||||
"TFDPRSpanPredictor",
|
"TFDPRSpanPredictor",
|
||||||
"TFFunnelBaseModel",
|
|
||||||
"TFGPT2DoubleHeadsModel",
|
"TFGPT2DoubleHeadsModel",
|
||||||
"TFOpenAIGPTDoubleHeadsModel",
|
"TFOpenAIGPTDoubleHeadsModel",
|
||||||
"TFRagModel",
|
"TFRagModel",
|
||||||
@@ -153,7 +153,7 @@ def get_model_modules():
|
|||||||
def get_models(module):
|
def get_models(module):
|
||||||
""" Get the objects in module that are models."""
|
""" Get the objects in module that are models."""
|
||||||
models = []
|
models = []
|
||||||
model_classes = (transformers.PreTrainedModel, transformers.TFPreTrainedModel)
|
model_classes = (transformers.PreTrainedModel, transformers.TFPreTrainedModel, transformers.FlaxPreTrainedModel)
|
||||||
for attr_name in dir(module):
|
for attr_name in dir(module):
|
||||||
if "Pretrained" in attr_name or "PreTrained" in attr_name:
|
if "Pretrained" in attr_name or "PreTrained" in attr_name:
|
||||||
continue
|
continue
|
||||||
@@ -249,10 +249,13 @@ def get_all_auto_configured_models():
|
|||||||
result = set() # To avoid duplicates we concatenate all model classes in a set.
|
result = set() # To avoid duplicates we concatenate all model classes in a set.
|
||||||
for attr_name in dir(transformers.models.auto.modeling_auto):
|
for attr_name in dir(transformers.models.auto.modeling_auto):
|
||||||
if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING"):
|
if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING"):
|
||||||
result = result | set(getattr(transformers.models.auto.modeling_auto, attr_name).values())
|
result = result | set(get_values(getattr(transformers.models.auto.modeling_auto, attr_name)))
|
||||||
for attr_name in dir(transformers.models.auto.modeling_tf_auto):
|
for attr_name in dir(transformers.models.auto.modeling_tf_auto):
|
||||||
if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING"):
|
if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING"):
|
||||||
result = result | set(getattr(transformers.models.auto.modeling_tf_auto, attr_name).values())
|
result = result | set(get_values(getattr(transformers.models.auto.modeling_tf_auto, attr_name)))
|
||||||
|
for attr_name in dir(transformers.models.auto.modeling_flax_auto):
|
||||||
|
if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING"):
|
||||||
|
result = result | set(get_values(getattr(transformers.models.auto.modeling_flax_auto, attr_name)))
|
||||||
return [cls.__name__ for cls in result]
|
return [cls.__name__ for cls in result]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user