Merge branch 'master' of github.com:huggingface/transformers

This commit is contained in:
Sylvain Gugger
2021-04-08 21:14:56 -04:00
46 changed files with 393 additions and 131 deletions

View File

@@ -348,7 +348,7 @@ jobs:
- v0.4-{{ checksum "setup.py" }} - v0.4-{{ checksum "setup.py" }}
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev - run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
- run: pip install --upgrade pip - run: pip install --upgrade pip
- run: pip install ."[all, docs]" - run: pip install ."[docs]"
- save_cache: - save_cache:
key: v0.4-build_doc-{{ checksum "setup.py" }} key: v0.4-build_doc-{{ checksum "setup.py" }}
paths: paths:
@@ -370,7 +370,7 @@ jobs:
keys: keys:
- v0.4-deploy_doc-{{ checksum "setup.py" }} - v0.4-deploy_doc-{{ checksum "setup.py" }}
- v0.4-{{ checksum "setup.py" }} - v0.4-{{ checksum "setup.py" }}
- run: pip install ."[all,docs]" - run: pip install ."[docs]"
- save_cache: - save_cache:
key: v0.4-deploy_doc-{{ checksum "setup.py" }} key: v0.4-deploy_doc-{{ checksum "setup.py" }}
paths: paths:

View File

@@ -33,7 +33,7 @@ jobs:
run: | run: |
apt -y update && apt install -y libsndfile1-dev apt -y update && apt install -y libsndfile1-dev
pip install --upgrade pip pip install --upgrade pip
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech] pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,deepspeed]
- name: Are GPUs recognized by our DL frameworks - name: Are GPUs recognized by our DL frameworks
run: | run: |
@@ -155,7 +155,7 @@ jobs:
run: | run: |
apt -y update && apt install -y libsndfile1-dev apt -y update && apt install -y libsndfile1-dev
pip install --upgrade pip pip install --upgrade pip
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech] pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,deepspeed,fairscale]
- name: Are GPUs recognized by our DL frameworks - name: Are GPUs recognized by our DL frameworks
run: | run: |

View File

@@ -274,6 +274,14 @@ Install the library via pypi:
pip install fairscale pip install fairscale
or via ``transformers``' ``extras``:
.. code-block:: bash
pip install transformers[fairscale]
(will become available starting from ``transformers==4.6.0``)
or find more details on `the FairScale's GitHub page <https://github.com/facebookresearch/fairscale/#installation>`__. or find more details on `the FairScale's GitHub page <https://github.com/facebookresearch/fairscale/#installation>`__.
If you're still struggling with the build, first make sure to read :ref:`zero-install-notes`. If you're still struggling with the build, first make sure to read :ref:`zero-install-notes`.
@@ -419,6 +427,14 @@ Install the library via pypi:
pip install deepspeed pip install deepspeed
or via ``transformers``' ``extras``:
.. code-block:: bash
pip install transformers[deepspeed]
(will become available starting from ``transformers==4.6.0``)
or find more details on `the DeepSpeed's GitHub page <https://github.com/microsoft/deepspeed#installation>`__ and or find more details on `the DeepSpeed's GitHub page <https://github.com/microsoft/deepspeed#installation>`__ and
`advanced install <https://www.deepspeed.ai/tutorials/advanced-install/>`__. `advanced install <https://www.deepspeed.ai/tutorials/advanced-install/>`__.
@@ -525,7 +541,7 @@ Here is an example of running ``run_translation.py`` under DeepSpeed deploying a
.. code-block:: bash .. code-block:: bash
deepspeed examples/seq2seq/run_translation.py \ deepspeed examples/seq2seq/run_translation.py \
--deepspeed examples/tests/deepspeed/ds_config.json \ --deepspeed tests/deepspeed/ds_config.json \
--model_name_or_path t5-small --per_device_train_batch_size 1 \ --model_name_or_path t5-small --per_device_train_batch_size 1 \
--output_dir output_dir --overwrite_output_dir --fp16 \ --output_dir output_dir --overwrite_output_dir --fp16 \
--do_train --max_train_samples 500 --num_train_epochs 1 \ --do_train --max_train_samples 500 --num_train_epochs 1 \
@@ -550,7 +566,7 @@ To deploy DeepSpeed with one GPU adjust the :class:`~transformers.Trainer` comma
.. code-block:: bash .. code-block:: bash
deepspeed --num_gpus=1 examples/seq2seq/run_translation.py \ deepspeed --num_gpus=1 examples/seq2seq/run_translation.py \
--deepspeed examples/tests/deepspeed/ds_config.json \ --deepspeed tests/deepspeed/ds_config.json \
--model_name_or_path t5-small --per_device_train_batch_size 1 \ --model_name_or_path t5-small --per_device_train_batch_size 1 \
--output_dir output_dir --overwrite_output_dir --fp16 \ --output_dir output_dir --overwrite_output_dir --fp16 \
--do_train --max_train_samples 500 --num_train_epochs 1 \ --do_train --max_train_samples 500 --num_train_epochs 1 \

View File

@@ -795,6 +795,23 @@ leave any data in there.
otherwise. otherwise.
Temporary sys.path override
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you need to temporary override ``sys.path`` to import from another test for example, you can use the
``ExtendSysPath`` context manager. Example:
.. code-block:: python
import os
from transformers.testing_utils import ExtendSysPath
bindir = os.path.abspath(os.path.dirname(__file__))
with ExtendSysPath(f"{bindir}/.."):
from test_trainer import TrainerIntegrationCommon # noqa
Skipping tests Skipping tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@@ -422,7 +422,12 @@ def main():
# Data collator # Data collator
# This one will take care of randomly masking the tokens. # This one will take care of randomly masking the tokens.
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=data_args.mlm_probability) pad_to_multiple_of_8 = data_args.line_by_line and training_args.fp16 and not data_args.pad_to_max_length
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm_probability=data_args.mlm_probability,
pad_to_multiple_of=8 if pad_to_multiple_of_8 else None,
)
# Initialize our Trainer # Initialize our Trainer
trainer = Trainer( trainer = Trainer(

View File

@@ -85,11 +85,14 @@ if stale_egg_info.exists():
# 1. all dependencies should be listed here with their version requirements if any # 1. all dependencies should be listed here with their version requirements if any
# 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py # 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py
_deps = [ _deps = [
"Pillow",
"black>=20.8b1", "black>=20.8b1",
"cookiecutter==1.7.2", "cookiecutter==1.7.2",
"dataclasses", "dataclasses",
"datasets", "datasets",
"deepspeed>0.3.13",
"docutils==0.16.0", "docutils==0.16.0",
"fairscale>0.3",
"faiss-cpu", "faiss-cpu",
"fastapi", "fastapi",
"filelock", "filelock",
@@ -102,13 +105,13 @@ _deps = [
"jax>=0.2.8", "jax>=0.2.8",
"jaxlib>=0.1.59", "jaxlib>=0.1.59",
"keras2onnx", "keras2onnx",
"nltk",
"numpy>=1.17", "numpy>=1.17",
"onnxconverter-common", "onnxconverter-common",
"onnxruntime-tools>=1.4.2", "onnxruntime-tools>=1.4.2",
"onnxruntime>=1.4.0", "onnxruntime>=1.4.0",
"packaging", "packaging",
"parameterized", "parameterized",
"Pillow",
"protobuf", "protobuf",
"psutil", "psutil",
"pydantic", "pydantic",
@@ -119,15 +122,18 @@ _deps = [
"recommonmark", "recommonmark",
"regex!=2019.12.17", "regex!=2019.12.17",
"requests", "requests",
"rouge-score",
"sacrebleu>=1.4.12",
"sacremoses", "sacremoses",
"sagemaker>=2.31.0",
"scikit-learn", "scikit-learn",
"sentencepiece==0.1.91", "sentencepiece==0.1.91",
"soundfile", "soundfile",
"sphinx-copybutton", "sphinx-copybutton",
"sphinx-markdown-tables", "sphinx-markdown-tables",
"sphinx-rtd-theme==0.4.3", # sphinx-rtd-theme==0.5.0 introduced big changes in the style. "sphinx-rtd-theme==0.4.3", # sphinx-rtd-theme==0.5.0 introduced big changes in the style.
"sphinxext-opengraph==0.4.1",
"sphinx==3.2.1", "sphinx==3.2.1",
"sphinxext-opengraph==0.4.1",
"starlette", "starlette",
"tensorflow-cpu>=2.3", "tensorflow-cpu>=2.3",
"tensorflow>=2.3", "tensorflow>=2.3",
@@ -139,7 +145,6 @@ _deps = [
"unidic>=1.0.2", "unidic>=1.0.2",
"unidic_lite>=1.0.7", "unidic_lite>=1.0.7",
"uvicorn", "uvicorn",
"sagemaker>=2.31.0",
] ]
@@ -230,6 +235,8 @@ extras["onnx"] = deps_list("onnxconverter-common", "keras2onnx") + extras["onnxr
extras["modelcreation"] = deps_list("cookiecutter") extras["modelcreation"] = deps_list("cookiecutter")
extras["sagemaker"] = deps_list("sagemaker") extras["sagemaker"] = deps_list("sagemaker")
extras["deepspeed"] = deps_list("deepspeed")
extras["fairscale"] = deps_list("fairscale")
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette") extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
extras["speech"] = deps_list("soundfile", "torchaudio") extras["speech"] = deps_list("soundfile", "torchaudio")
@@ -238,20 +245,12 @@ extras["vision"] = deps_list("Pillow")
extras["sentencepiece"] = deps_list("sentencepiece", "protobuf") extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
extras["testing"] = ( extras["testing"] = (
deps_list( deps_list(
"pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-sugar", "black" "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-sugar", "black", "sacrebleu", "rouge-score", "nltk"
) )
+ extras["retrieval"] + extras["retrieval"]
+ extras["modelcreation"] + extras["modelcreation"]
) )
extras["docs"] = deps_list(
"docutils",
"recommonmark",
"sphinx",
"sphinx-markdown-tables",
"sphinx-rtd-theme",
"sphinx-copybutton",
"sphinxext-opengraph",
)
extras["quality"] = deps_list("black", "isort", "flake8") extras["quality"] = deps_list("black", "isort", "flake8")
extras["all"] = ( extras["all"] = (
@@ -264,12 +263,24 @@ extras["all"] = (
+ extras["vision"] + extras["vision"]
) )
extras["docs_specific"] = deps_list(
"docutils",
"recommonmark",
"sphinx",
"sphinx-markdown-tables",
"sphinx-rtd-theme",
"sphinx-copybutton",
"sphinxext-opengraph",
)
# "docs" needs "all" to resolve all the references
extras["docs"] = extras["all"] + extras["docs_specific"]
extras["dev"] = ( extras["dev"] = (
extras["all"] extras["all"]
+ extras["testing"] + extras["testing"]
+ extras["quality"] + extras["quality"]
+ extras["ja"] + extras["ja"]
+ extras["docs"] + extras["docs_specific"]
+ extras["sklearn"] + extras["sklearn"]
+ extras["modelcreation"] + extras["modelcreation"]
) )

View File

@@ -192,7 +192,7 @@ class DataCollatorForTokenClassification:
return batch return batch
def _collate_batch(examples, tokenizer): def _collate_batch(examples, tokenizer, pad_to_multiple_of: Optional[int] = None):
"""Collate `examples` into a batch, using the information in `tokenizer` for padding if necessary.""" """Collate `examples` into a batch, using the information in `tokenizer` for padding if necessary."""
# Tensorize if necessary. # Tensorize if necessary.
if isinstance(examples[0], (list, tuple)): if isinstance(examples[0], (list, tuple)):
@@ -201,7 +201,7 @@ def _collate_batch(examples, tokenizer):
# Check if padding is necessary. # Check if padding is necessary.
length_of_first = examples[0].size(0) length_of_first = examples[0].size(0)
are_tensors_same_length = all(x.size(0) == length_of_first for x in examples) are_tensors_same_length = all(x.size(0) == length_of_first for x in examples)
if are_tensors_same_length: if are_tensors_same_length and (pad_to_multiple_of is None or length_of_first % pad_to_multiple_of == 0):
return torch.stack(examples, dim=0) return torch.stack(examples, dim=0)
# If yes, check if we have a `pad_token`. # If yes, check if we have a `pad_token`.
@@ -213,6 +213,8 @@ def _collate_batch(examples, tokenizer):
# Creating the full tensor and filling it with our data. # Creating the full tensor and filling it with our data.
max_length = max(x.size(0) for x in examples) max_length = max(x.size(0) for x in examples)
if pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
result = examples[0].new_full([len(examples), max_length], tokenizer.pad_token_id) result = examples[0].new_full([len(examples), max_length], tokenizer.pad_token_id)
for i, example in enumerate(examples): for i, example in enumerate(examples):
if tokenizer.padding_side == "right": if tokenizer.padding_side == "right":
@@ -311,6 +313,8 @@ class DataCollatorForLanguageModeling:
non-masked tokens and the value to predict for the masked token. non-masked tokens and the value to predict for the masked token.
mlm_probability (:obj:`float`, `optional`, defaults to 0.15): mlm_probability (:obj:`float`, `optional`, defaults to 0.15):
The probability with which to (randomly) mask tokens in the input, when :obj:`mlm` is set to :obj:`True`. The probability with which to (randomly) mask tokens in the input, when :obj:`mlm` is set to :obj:`True`.
pad_to_multiple_of (:obj:`int`, `optional`):
If set will pad the sequence to a multiple of the provided value.
.. note:: .. note::
@@ -323,6 +327,7 @@ class DataCollatorForLanguageModeling:
tokenizer: PreTrainedTokenizerBase tokenizer: PreTrainedTokenizerBase
mlm: bool = True mlm: bool = True
mlm_probability: float = 0.15 mlm_probability: float = 0.15
pad_to_multiple_of: Optional[int] = None
def __post_init__(self): def __post_init__(self):
if self.mlm and self.tokenizer.mask_token is None: if self.mlm and self.tokenizer.mask_token is None:
@@ -336,9 +341,9 @@ class DataCollatorForLanguageModeling:
) -> Dict[str, torch.Tensor]: ) -> Dict[str, torch.Tensor]:
# Handle dict or lists with proper padding and conversion to tensor. # Handle dict or lists with proper padding and conversion to tensor.
if isinstance(examples[0], (dict, BatchEncoding)): if isinstance(examples[0], (dict, BatchEncoding)):
batch = self.tokenizer.pad(examples, return_tensors="pt") batch = self.tokenizer.pad(examples, return_tensors="pt", pad_to_multiple_of=self.pad_to_multiple_of)
else: else:
batch = {"input_ids": _collate_batch(examples, self.tokenizer)} batch = {"input_ids": _collate_batch(examples, self.tokenizer, pad_to_multiple_of=self.pad_to_multiple_of)}
# If special token mask has been preprocessed, pop it from the dict. # If special token mask has been preprocessed, pop it from the dict.
special_tokens_mask = batch.pop("special_tokens_mask", None) special_tokens_mask = batch.pop("special_tokens_mask", None)

View File

@@ -14,7 +14,7 @@
import sys import sys
from .dependency_versions_table import deps from .dependency_versions_table import deps
from .utils.versions import require_version_core from .utils.versions import require_version, require_version_core
# define which module versions we always want to check at run time # define which module versions we always want to check at run time
@@ -41,3 +41,7 @@ for pkg in pkgs_to_check_at_runtime:
require_version_core(deps[pkg]) require_version_core(deps[pkg])
else: else:
raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py") raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
def dep_version_check(pkg, hint=None):
require_version(deps[pkg], hint)

View File

@@ -2,11 +2,14 @@
# 1. modify the `_deps` dict in setup.py # 1. modify the `_deps` dict in setup.py
# 2. run `make deps_table_update`` # 2. run `make deps_table_update``
deps = { deps = {
"Pillow": "Pillow",
"black": "black>=20.8b1", "black": "black>=20.8b1",
"cookiecutter": "cookiecutter==1.7.2", "cookiecutter": "cookiecutter==1.7.2",
"dataclasses": "dataclasses", "dataclasses": "dataclasses",
"datasets": "datasets", "datasets": "datasets",
"deepspeed": "deepspeed>0.3.13",
"docutils": "docutils==0.16.0", "docutils": "docutils==0.16.0",
"fairscale": "fairscale>0.3",
"faiss-cpu": "faiss-cpu", "faiss-cpu": "faiss-cpu",
"fastapi": "fastapi", "fastapi": "fastapi",
"filelock": "filelock", "filelock": "filelock",
@@ -19,13 +22,13 @@ deps = {
"jax": "jax>=0.2.8", "jax": "jax>=0.2.8",
"jaxlib": "jaxlib>=0.1.59", "jaxlib": "jaxlib>=0.1.59",
"keras2onnx": "keras2onnx", "keras2onnx": "keras2onnx",
"nltk": "nltk",
"numpy": "numpy>=1.17", "numpy": "numpy>=1.17",
"onnxconverter-common": "onnxconverter-common", "onnxconverter-common": "onnxconverter-common",
"onnxruntime-tools": "onnxruntime-tools>=1.4.2", "onnxruntime-tools": "onnxruntime-tools>=1.4.2",
"onnxruntime": "onnxruntime>=1.4.0", "onnxruntime": "onnxruntime>=1.4.0",
"packaging": "packaging", "packaging": "packaging",
"parameterized": "parameterized", "parameterized": "parameterized",
"Pillow": "Pillow",
"protobuf": "protobuf", "protobuf": "protobuf",
"psutil": "psutil", "psutil": "psutil",
"pydantic": "pydantic", "pydantic": "pydantic",
@@ -36,15 +39,18 @@ deps = {
"recommonmark": "recommonmark", "recommonmark": "recommonmark",
"regex": "regex!=2019.12.17", "regex": "regex!=2019.12.17",
"requests": "requests", "requests": "requests",
"rouge-score": "rouge-score",
"sacrebleu": "sacrebleu>=1.4.12",
"sacremoses": "sacremoses", "sacremoses": "sacremoses",
"sagemaker": "sagemaker>=2.31.0",
"scikit-learn": "scikit-learn", "scikit-learn": "scikit-learn",
"sentencepiece": "sentencepiece==0.1.91", "sentencepiece": "sentencepiece==0.1.91",
"soundfile": "soundfile", "soundfile": "soundfile",
"sphinx-copybutton": "sphinx-copybutton", "sphinx-copybutton": "sphinx-copybutton",
"sphinx-markdown-tables": "sphinx-markdown-tables", "sphinx-markdown-tables": "sphinx-markdown-tables",
"sphinx-rtd-theme": "sphinx-rtd-theme==0.4.3", "sphinx-rtd-theme": "sphinx-rtd-theme==0.4.3",
"sphinxext-opengraph": "sphinxext-opengraph==0.4.1",
"sphinx": "sphinx==3.2.1", "sphinx": "sphinx==3.2.1",
"sphinxext-opengraph": "sphinxext-opengraph==0.4.1",
"starlette": "starlette", "starlette": "starlette",
"tensorflow-cpu": "tensorflow-cpu>=2.3", "tensorflow-cpu": "tensorflow-cpu>=2.3",
"tensorflow": "tensorflow>=2.3", "tensorflow": "tensorflow>=2.3",
@@ -56,5 +62,4 @@ deps = {
"unidic": "unidic>=1.0.2", "unidic": "unidic>=1.0.2",
"unidic_lite": "unidic_lite>=1.0.7", "unidic_lite": "unidic_lite>=1.0.7",
"uvicorn": "uvicorn", "uvicorn": "uvicorn",
"sagemaker": "sagemaker>=2.31.0",
} }

View File

@@ -24,8 +24,8 @@ import tempfile
from copy import deepcopy from copy import deepcopy
from pathlib import Path from pathlib import Path
from .dependency_versions_check import dep_version_check
from .utils import logging from .utils import logging
from .utils.versions import require_version
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -324,7 +324,7 @@ def deepspeed_parse_config(ds_config):
If it's already a dict, return a copy of it, so that we can freely modify it. If it's already a dict, return a copy of it, so that we can freely modify it.
""" """
require_version("deepspeed>0.3.13") dep_version_check("deepspeed")
if isinstance(ds_config, dict): if isinstance(ds_config, dict):
# Don't modify user's data should they want to reuse it (e.g. in tests), because once we # Don't modify user's data should they want to reuse it (e.g. in tests), because once we
@@ -604,7 +604,9 @@ class TensorBoardCallback(TrainerCallback):
self.tb_writer.add_hparams(args.to_sanitized_dict(), metric_dict={}) self.tb_writer.add_hparams(args.to_sanitized_dict(), metric_dict={})
def on_log(self, args, state, control, logs=None, **kwargs): def on_log(self, args, state, control, logs=None, **kwargs):
if state.is_world_process_zero: if not state.is_world_process_zero:
return
if self.tb_writer is None: if self.tb_writer is None:
self._init_summary_writer(args) self._init_summary_writer(args)

View File

@@ -387,6 +387,7 @@ class FlaxPreTrainedModel(ABC):
# get abs dir # get abs dir
save_directory = os.path.abspath(save_directory) save_directory = os.path.abspath(save_directory)
# save config as well # save config as well
self.config.architectures = [self.__class__.__name__[4:]]
self.config.save_pretrained(save_directory) self.config.save_pretrained(save_directory)
# save model # save model

View File

@@ -1037,6 +1037,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
logger.info(f"Saved model created in {saved_model_dir}") logger.info(f"Saved model created in {saved_model_dir}")
# Save configuration file # Save configuration file
self.config.architectures = [self.__class__.__name__[2:]]
self.config.save_pretrained(save_directory) self.config.save_pretrained(save_directory)
# If we save using the predefined names, we can load using `from_pretrained` # If we save using the predefined names, we can load using `from_pretrained`

View File

@@ -22,6 +22,7 @@ from ...file_utils import _BaseLazyModule, is_flax_available, is_tf_available, i
_import_structure = { _import_structure = {
"auto_factory": ["get_values"],
"configuration_auto": ["ALL_PRETRAINED_CONFIG_ARCHIVE_MAP", "CONFIG_MAPPING", "MODEL_NAMES_MAPPING", "AutoConfig"], "configuration_auto": ["ALL_PRETRAINED_CONFIG_ARCHIVE_MAP", "CONFIG_MAPPING", "MODEL_NAMES_MAPPING", "AutoConfig"],
"feature_extraction_auto": ["FEATURE_EXTRACTOR_MAPPING", "AutoFeatureExtractor"], "feature_extraction_auto": ["FEATURE_EXTRACTOR_MAPPING", "AutoFeatureExtractor"],
"tokenization_auto": ["TOKENIZER_MAPPING", "AutoTokenizer"], "tokenization_auto": ["TOKENIZER_MAPPING", "AutoTokenizer"],
@@ -104,6 +105,7 @@ if is_flax_available():
if TYPE_CHECKING: if TYPE_CHECKING:
from .auto_factory import get_values
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, MODEL_NAMES_MAPPING, AutoConfig from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, MODEL_NAMES_MAPPING, AutoConfig
from .feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor from .feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer

View File

@@ -328,6 +328,26 @@ FROM_PRETRAINED_FLAX_DOCSTRING = """
""" """
def _get_model_class(config, model_mapping):
supported_models = model_mapping[type(config)]
if not isinstance(supported_models, (list, tuple)):
return supported_models
name_to_model = {model.__name__: model for model in supported_models}
architectures = getattr(config, "architectures", [])
for arch in architectures:
if arch in name_to_model:
return name_to_model[arch]
elif f"TF{arch}" in name_to_model:
return name_to_model[f"TF{arch}"]
elif f"Flax{arch}" in name_to_model:
return name_to_model[f"Flax{arch}"]
# If not architecture is set in the config or match the supported models, the first element of the tuple is the
# defaults.
return supported_models[0]
class _BaseAutoModelClass: class _BaseAutoModelClass:
# Base class for auto models. # Base class for auto models.
_model_mapping = None _model_mapping = None
@@ -341,7 +361,8 @@ class _BaseAutoModelClass:
def from_config(cls, config, **kwargs): def from_config(cls, config, **kwargs):
if type(config) in cls._model_mapping.keys(): if type(config) in cls._model_mapping.keys():
return cls._model_mapping[type(config)](config, **kwargs) model_class = _get_model_class(config, cls._model_mapping)
return model_class(config, **kwargs)
raise ValueError( raise ValueError(
f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
@@ -356,9 +377,8 @@ class _BaseAutoModelClass:
) )
if type(config) in cls._model_mapping.keys(): if type(config) in cls._model_mapping.keys():
return cls._model_mapping[type(config)].from_pretrained( model_class = _get_model_class(config, cls._model_mapping)
pretrained_model_name_or_path, *model_args, config=config, **kwargs return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
)
raise ValueError( raise ValueError(
f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
@@ -418,3 +438,14 @@ def auto_class_factory(name, model_mapping, checkpoint_for_example="bert-base-ca
from_pretrained = replace_list_option_in_docstrings(model_mapping)(from_pretrained) from_pretrained = replace_list_option_in_docstrings(model_mapping)(from_pretrained)
new_class.from_pretrained = classmethod(from_pretrained) new_class.from_pretrained = classmethod(from_pretrained)
return new_class return new_class
def get_values(model_mapping):
result = []
for model in model_mapping.values():
if isinstance(model, (list, tuple)):
result += list(model)
else:
result.append(model)
return result

View File

@@ -247,29 +247,38 @@ MODEL_NAMES_MAPPING = OrderedDict(
) )
def _get_class_name(model_class):
if isinstance(model_class, (list, tuple)):
return " or ".join([f":class:`~transformers.{c.__name__}`" for c in model_class])
return f":class:`~transformers.{model_class.__name__}`"
def _list_model_options(indent, config_to_class=None, use_model_types=True): def _list_model_options(indent, config_to_class=None, use_model_types=True):
if config_to_class is None and not use_model_types: if config_to_class is None and not use_model_types:
raise ValueError("Using `use_model_types=False` requires a `config_to_class` dictionary.") raise ValueError("Using `use_model_types=False` requires a `config_to_class` dictionary.")
if use_model_types: if use_model_types:
if config_to_class is None: if config_to_class is None:
model_type_to_name = {model_type: config.__name__ for model_type, config in CONFIG_MAPPING.items()} model_type_to_name = {
model_type: f":class:`~transformers.{config.__name__}`"
for model_type, config in CONFIG_MAPPING.items()
}
else: else:
model_type_to_name = { model_type_to_name = {
model_type: config_to_class[config].__name__ model_type: _get_class_name(config_to_class[config])
for model_type, config in CONFIG_MAPPING.items() for model_type, config in CONFIG_MAPPING.items()
if config in config_to_class if config in config_to_class
} }
lines = [ lines = [
f"{indent}- **{model_type}** -- :class:`~transformers.{model_type_to_name[model_type]}` ({MODEL_NAMES_MAPPING[model_type]} model)" f"{indent}- **{model_type}** -- {model_type_to_name[model_type]} ({MODEL_NAMES_MAPPING[model_type]} model)"
for model_type in sorted(model_type_to_name.keys()) for model_type in sorted(model_type_to_name.keys())
] ]
else: else:
config_to_name = {config.__name__: clas.__name__ for config, clas in config_to_class.items()} config_to_name = {config.__name__: _get_class_name(clas) for config, clas in config_to_class.items()}
config_to_model_name = { config_to_model_name = {
config.__name__: MODEL_NAMES_MAPPING[model_type] for model_type, config in CONFIG_MAPPING.items() config.__name__: MODEL_NAMES_MAPPING[model_type] for model_type, config in CONFIG_MAPPING.items()
} }
lines = [ lines = [
f"{indent}- :class:`~transformers.{config_name}` configuration class: :class:`~transformers.{config_to_name[config_name]}` ({config_to_model_name[config_name]} model)" f"{indent}- :class:`~transformers.{config_name}` configuration class: {config_to_name[config_name]} ({config_to_model_name[config_name]} model)"
for config_name in sorted(config_to_name.keys()) for config_name in sorted(config_to_name.keys())
] ]
return "\n".join(lines) return "\n".join(lines)

View File

@@ -124,6 +124,7 @@ from ..flaubert.modeling_flaubert import (
) )
from ..fsmt.modeling_fsmt import FSMTForConditionalGeneration, FSMTModel from ..fsmt.modeling_fsmt import FSMTForConditionalGeneration, FSMTModel
from ..funnel.modeling_funnel import ( from ..funnel.modeling_funnel import (
FunnelBaseModel,
FunnelForMaskedLM, FunnelForMaskedLM,
FunnelForMultipleChoice, FunnelForMultipleChoice,
FunnelForPreTraining, FunnelForPreTraining,
@@ -377,7 +378,7 @@ MODEL_MAPPING = OrderedDict(
(CTRLConfig, CTRLModel), (CTRLConfig, CTRLModel),
(ElectraConfig, ElectraModel), (ElectraConfig, ElectraModel),
(ReformerConfig, ReformerModel), (ReformerConfig, ReformerModel),
(FunnelConfig, FunnelModel), (FunnelConfig, (FunnelModel, FunnelBaseModel)),
(LxmertConfig, LxmertModel), (LxmertConfig, LxmertModel),
(BertGenerationConfig, BertGenerationEncoder), (BertGenerationConfig, BertGenerationEncoder),
(DebertaConfig, DebertaModel), (DebertaConfig, DebertaModel),

View File

@@ -91,6 +91,7 @@ from ..flaubert.modeling_tf_flaubert import (
TFFlaubertWithLMHeadModel, TFFlaubertWithLMHeadModel,
) )
from ..funnel.modeling_tf_funnel import ( from ..funnel.modeling_tf_funnel import (
TFFunnelBaseModel,
TFFunnelForMaskedLM, TFFunnelForMaskedLM,
TFFunnelForMultipleChoice, TFFunnelForMultipleChoice,
TFFunnelForPreTraining, TFFunnelForPreTraining,
@@ -242,7 +243,7 @@ TF_MODEL_MAPPING = OrderedDict(
(XLMConfig, TFXLMModel), (XLMConfig, TFXLMModel),
(CTRLConfig, TFCTRLModel), (CTRLConfig, TFCTRLModel),
(ElectraConfig, TFElectraModel), (ElectraConfig, TFElectraModel),
(FunnelConfig, TFFunnelModel), (FunnelConfig, (TFFunnelModel, TFFunnelBaseModel)),
(DPRConfig, TFDPRQuestionEncoder), (DPRConfig, TFDPRQuestionEncoder),
(MPNetConfig, TFMPNetModel), (MPNetConfig, TFMPNetModel),
(BartConfig, TFBartModel), (BartConfig, TFBartModel),

View File

@@ -24,6 +24,7 @@ import unittest
from distutils.util import strtobool from distutils.util import strtobool
from io import StringIO from io import StringIO
from pathlib import Path from pathlib import Path
from typing import Iterator, Union
from .file_utils import ( from .file_utils import (
is_datasets_available, is_datasets_available,
@@ -621,6 +622,27 @@ class CaptureLogger:
return f"captured: {self.out}\n" return f"captured: {self.out}\n"
@contextlib.contextmanager
# adapted from https://stackoverflow.com/a/64789046/9201239
def ExtendSysPath(path: Union[str, os.PathLike]) -> Iterator[None]:
"""
Temporary add given path to `sys.path`.
Usage ::
with ExtendSysPath('/path/to/dir'):
mymodule = importlib.import_module('mymodule')
"""
path = os.fspath(path)
try:
sys.path.insert(0, path)
yield
finally:
sys.path.remove(path)
class TestCasePlus(unittest.TestCase): class TestCasePlus(unittest.TestCase):
""" """
This class extends `unittest.TestCase` with additional features. This class extends `unittest.TestCase` with additional features.

View File

@@ -54,6 +54,7 @@ from torch.utils.data.distributed import DistributedSampler
from torch.utils.data.sampler import RandomSampler, SequentialSampler from torch.utils.data.sampler import RandomSampler, SequentialSampler
from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator
from .dependency_versions_check import dep_version_check
from .file_utils import ( from .file_utils import (
WEIGHTS_NAME, WEIGHTS_NAME,
is_apex_available, is_apex_available,
@@ -139,17 +140,14 @@ if is_torch_tpu_available():
import torch_xla.distributed.parallel_loader as pl import torch_xla.distributed.parallel_loader as pl
if is_fairscale_available(): if is_fairscale_available():
dep_version_check("fairscale")
import fairscale import fairscale
from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP
from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
from fairscale.nn.wrap import auto_wrap
from fairscale.optim import OSS from fairscale.optim import OSS
from fairscale.optim.grad_scaler import ShardedGradScaler from fairscale.optim.grad_scaler import ShardedGradScaler
if version.parse(fairscale.__version__) >= version.parse("0.3"):
from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP
from fairscale.nn.wrap import auto_wrap
else:
FullyShardedDDP = None
if is_sagemaker_dp_enabled(): if is_sagemaker_dp_enabled():
import smdistributed.dataparallel.torch.distributed as dist import smdistributed.dataparallel.torch.distributed as dist
from smdistributed.dataparallel.torch.parallel.distributed import DistributedDataParallel as DDP from smdistributed.dataparallel.torch.parallel.distributed import DistributedDataParallel as DDP

View File

@@ -531,6 +531,12 @@ class TrainingArguments:
) )
def __post_init__(self): def __post_init__(self):
# Handle --use_env option in torch.distributed.launch (local_rank not passed as an arg then).
# This needs to happen before any call to self.device or self.n_gpu.
env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
if env_local_rank != -1 and env_local_rank != self.local_rank:
self.local_rank = env_local_rank
# expand paths, if not os.makedirs("~/bar") will make directory # expand paths, if not os.makedirs("~/bar") will make directory
# in the current directory instead of the actual home # in the current directory instead of the actual home
#  see https://github.com/huggingface/transformers/issues/10628 #  see https://github.com/huggingface/transformers/issues/10628

View File

@@ -60,6 +60,12 @@ def require_version(requirement: str, hint: Optional[str] = None) -> None:
Args: Args:
requirement (:obj:`str`): pip style definition, e.g., "tokenizers==0.9.4", "tqdm>=4.27", "numpy" requirement (:obj:`str`): pip style definition, e.g., "tokenizers==0.9.4", "tqdm>=4.27", "numpy"
hint (:obj:`str`, `optional`): what suggestion to print in case of requirements not being met hint (:obj:`str`, `optional`): what suggestion to print in case of requirements not being met
Example::
require_version("pandas>1.1.2")
require_version("numpy>1.18.5", "this is important to have for whatever reason")
""" """
hint = f"\n{hint}" if hint is not None else "" hint = f"\n{hint}" if hint is not None else ""

View File

@@ -16,16 +16,16 @@ import dataclasses
import io import io
import json import json
import os import os
import sys
import unittest import unittest
from copy import deepcopy from copy import deepcopy
from parameterized import parameterized from parameterized import parameterized
from transformers import TrainingArguments from transformers import TrainingArguments, is_torch_available
from transformers.file_utils import WEIGHTS_NAME from transformers.file_utils import WEIGHTS_NAME
from transformers.integrations import is_deepspeed_available from transformers.integrations import is_deepspeed_available
from transformers.testing_utils import ( from transformers.testing_utils import (
CaptureLogger, CaptureLogger,
ExtendSysPath,
TestCasePlus, TestCasePlus,
execute_subprocess_async, execute_subprocess_async,
get_gpu_count, get_gpu_count,
@@ -38,8 +38,11 @@ from transformers.trainer_utils import set_seed
bindir = os.path.abspath(os.path.dirname(__file__)) bindir = os.path.abspath(os.path.dirname(__file__))
sys.path.append(f"{bindir}/../../../tests") with ExtendSysPath(f"{bindir}/.."):
from test_trainer import TrainerIntegrationCommon, get_regression_trainer # noqa from test_trainer import TrainerIntegrationCommon # noqa
if is_torch_available():
from test_trainer import get_regression_trainer # noqa
set_seed(42) set_seed(42)

View File

@@ -21,6 +21,7 @@ from unittest.mock import patch
from transformers.file_utils import is_apex_available from transformers.file_utils import is_apex_available
from transformers.integrations import is_fairscale_available from transformers.integrations import is_fairscale_available
from transformers.testing_utils import ( from transformers.testing_utils import (
ExtendSysPath,
TestCasePlus, TestCasePlus,
execute_subprocess_async, execute_subprocess_async,
get_gpu_count, get_gpu_count,
@@ -34,8 +35,8 @@ from transformers.trainer_utils import set_seed
bindir = os.path.abspath(os.path.dirname(__file__)) bindir = os.path.abspath(os.path.dirname(__file__))
sys.path.append(f"{bindir}/../../seq2seq") with ExtendSysPath(f"{bindir}/../../examples/seq2seq"):
from run_translation import main # noqa from run_translation import main # noqa
set_seed(42) set_seed(42)

View File

@@ -136,10 +136,7 @@ images:
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /,
*CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ] *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
``` ```
2. In the PR comment describe what test we ran and with which framework versions. Here you can copy the table from [Current Tests](#current-tests). 2. In the PR comment describe what test we ran and with which framework versions. Here you can copy the table from [Current Tests](#current-tests). You can take a look at this [PR](https://github.com/aws/deep-learning-containers/pull/1016), which information are needed.
TODO: Add a screenshot of PR + Text template to make it easy to open.
## Current Tests ## Current Tests

View File

@@ -146,11 +146,8 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertEqual(batch["labels"].shape, torch.Size([2, 6])) self.assertEqual(batch["labels"].shape, torch.Size([2, 6]))
self.assertEqual(batch["labels"][0].tolist(), [0, 1, 2] + [-1] * 3) self.assertEqual(batch["labels"][0].tolist(), [0, 1, 2] + [-1] * 3)
def test_data_collator_for_language_modeling(self): def _test_no_pad_and_pad(self, no_pad_features, pad_features):
tokenizer = BertTokenizer(self.vocab_file) tokenizer = BertTokenizer(self.vocab_file)
no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False) data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
batch = data_collator(no_pad_features) batch = data_collator(no_pad_features)
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10))) self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10)))
@@ -160,6 +157,15 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10))) self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 10))) self.assertEqual(batch["labels"].shape, torch.Size((2, 10)))
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, pad_to_multiple_of=8)
batch = data_collator(no_pad_features)
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
batch = data_collator(pad_features)
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
tokenizer._pad_token = None tokenizer._pad_token = None
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False) data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
@@ -185,6 +191,32 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertTrue(torch.any(masked_tokens)) self.assertTrue(torch.any(masked_tokens))
self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist())) self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist()))
data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8)
batch = data_collator(no_pad_features)
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
masked_tokens = batch["input_ids"] == tokenizer.mask_token_id
self.assertTrue(torch.any(masked_tokens))
self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist()))
batch = data_collator(pad_features)
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 16)))
masked_tokens = batch["input_ids"] == tokenizer.mask_token_id
self.assertTrue(torch.any(masked_tokens))
self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist()))
def test_data_collator_for_language_modeling(self):
no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
self._test_no_pad_and_pad(no_pad_features, pad_features)
no_pad_features = [list(range(10)), list(range(10))]
pad_features = [list(range(5)), list(range(10))]
self._test_no_pad_and_pad(no_pad_features, pad_features)
def test_plm(self): def test_plm(self):
tokenizer = BertTokenizer(self.vocab_file) tokenizer = BertTokenizer(self.vocab_file)
no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
@@ -225,6 +257,14 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertEqual(batch["labels"].shape, torch.Size((2, 5))) self.assertEqual(batch["labels"].shape, torch.Size((2, 5)))
self.assertEqual(batch["next_sentence_label"].shape, torch.Size((2,))) self.assertEqual(batch["next_sentence_label"].shape, torch.Size((2,)))
data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8)
batch = data_collator(features)
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 8)))
self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 8)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 8)))
self.assertEqual(batch["next_sentence_label"].shape, torch.Size((2,)))
def test_sop(self): def test_sop(self):
tokenizer = BertTokenizer(self.vocab_file) tokenizer = BertTokenizer(self.vocab_file)
features = [ features = [
@@ -242,3 +282,11 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 5))) self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 5)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 5))) self.assertEqual(batch["labels"].shape, torch.Size((2, 5)))
self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,))) self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,)))
data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8)
batch = data_collator(features)
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 8)))
self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 8)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 8)))
self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,)))

View File

@@ -17,6 +17,7 @@
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, slow, torch_device from transformers.testing_utils import require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -234,7 +235,7 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
) )

View File

@@ -13,7 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import copy
import tempfile
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
@@ -46,6 +47,8 @@ if is_torch_available():
BertForSequenceClassification, BertForSequenceClassification,
BertForTokenClassification, BertForTokenClassification,
BertModel, BertModel,
FunnelBaseModel,
FunnelModel,
GPT2Config, GPT2Config,
GPT2LMHeadModel, GPT2LMHeadModel,
RobertaForMaskedLM, RobertaForMaskedLM,
@@ -218,6 +221,21 @@ class AutoModelTest(unittest.TestCase):
self.assertEqual(model.num_parameters(), 14410) self.assertEqual(model.num_parameters(), 14410)
self.assertEqual(model.num_parameters(only_trainable=True), 14410) self.assertEqual(model.num_parameters(only_trainable=True), 14410)
def test_from_pretrained_with_tuple_values(self):
# For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel
model = AutoModel.from_pretrained("sgugger/funnel-random-tiny")
self.assertIsInstance(model, FunnelModel)
config = copy.deepcopy(model.config)
config.architectures = ["FunnelBaseModel"]
model = AutoModel.from_config(config)
self.assertIsInstance(model, FunnelBaseModel)
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir)
model = AutoModel.from_pretrained(tmp_dir)
self.assertIsInstance(model, FunnelBaseModel)
def test_parents_and_children_in_mappings(self): def test_parents_and_children_in_mappings(self):
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered # Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
# by the parents and will return the wrong configuration type when using auto models # by the parents and will return the wrong configuration type when using auto models
@@ -242,6 +260,12 @@ class AutoModelTest(unittest.TestCase):
assert not issubclass( assert not issubclass(
child_config, parent_config child_config, parent_config
), f"{child_config.__name__} is child of {parent_config.__name__}" ), f"{child_config.__name__} is child of {parent_config.__name__}"
assert not issubclass(
child_model, parent_model # Tuplify child_model and parent_model since some of them could be tuples.
), f"{child_config.__name__} is child of {parent_config.__name__}" if not isinstance(child_model, (list, tuple)):
child_model = (child_model,)
if not isinstance(parent_model, (list, tuple)):
parent_model = (parent_model,)
for child, parent in [(a, b) for a in child_model for b in parent_model]:
assert not issubclass(child, parent), f"{child.__name__} is child of {parent.__name__}"

View File

@@ -17,6 +17,7 @@
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, slow, torch_device from transformers.testing_utils import require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -444,7 +445,7 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
) )

View File

@@ -19,6 +19,7 @@ import unittest
from tests.test_modeling_common import floats_tensor from tests.test_modeling_common import floats_tensor
from transformers import is_torch_available from transformers import is_torch_available
from transformers.models.auto import get_values
from transformers.models.big_bird.tokenization_big_bird import BigBirdTokenizer from transformers.models.big_bird.tokenization_big_bird import BigBirdTokenizer
from transformers.testing_utils import require_torch, slow, torch_device from transformers.testing_utils import require_torch, slow, torch_device
@@ -458,7 +459,7 @@ class BigBirdModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
) )

View File

@@ -24,6 +24,7 @@ from typing import List, Tuple
from transformers import is_torch_available from transformers import is_torch_available
from transformers.file_utils import WEIGHTS_NAME from transformers.file_utils import WEIGHTS_NAME
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device
@@ -79,7 +80,7 @@ class ModelTesterMixin:
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = copy.deepcopy(inputs_dict) inputs_dict = copy.deepcopy(inputs_dict)
if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
inputs_dict = { inputs_dict = {
k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous() k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous()
if isinstance(v, torch.Tensor) and v.ndim > 1 if isinstance(v, torch.Tensor) and v.ndim > 1
@@ -88,9 +89,9 @@ class ModelTesterMixin:
} }
if return_labels: if return_labels:
if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device) inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device)
elif model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): elif model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
inputs_dict["start_positions"] = torch.zeros( inputs_dict["start_positions"] = torch.zeros(
self.model_tester.batch_size, dtype=torch.long, device=torch_device self.model_tester.batch_size, dtype=torch.long, device=torch_device
) )
@@ -98,18 +99,18 @@ class ModelTesterMixin:
self.model_tester.batch_size, dtype=torch.long, device=torch_device self.model_tester.batch_size, dtype=torch.long, device=torch_device
) )
elif model_class in [ elif model_class in [
*MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(), *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
*MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values(), *get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING),
*MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING.values(), *get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),
]: ]:
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
self.model_tester.batch_size, dtype=torch.long, device=torch_device self.model_tester.batch_size, dtype=torch.long, device=torch_device
) )
elif model_class in [ elif model_class in [
*MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(), *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
*MODEL_FOR_CAUSAL_LM_MAPPING.values(), *get_values(MODEL_FOR_CAUSAL_LM_MAPPING),
*MODEL_FOR_MASKED_LM_MAPPING.values(), *get_values(MODEL_FOR_MASKED_LM_MAPPING),
*MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(), *get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
]: ]:
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
@@ -229,7 +230,7 @@ class ModelTesterMixin:
config.return_dict = True config.return_dict = True
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
if model_class in MODEL_MAPPING.values(): if model_class in get_values(MODEL_MAPPING):
continue continue
model = model_class(config) model = model_class(config)
model.to(torch_device) model.to(torch_device)
@@ -248,7 +249,7 @@ class ModelTesterMixin:
config.return_dict = True config.return_dict = True
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
if model_class in MODEL_MAPPING.values(): if model_class in get_values(MODEL_MAPPING):
continue continue
model = model_class(config) model = model_class(config)
model.to(torch_device) model.to(torch_device)
@@ -312,7 +313,7 @@ class ModelTesterMixin:
if "labels" in inputs_dict: if "labels" in inputs_dict:
correct_outlen += 1 # loss is added to beginning correct_outlen += 1 # loss is added to beginning
# Question Answering model returns start_logits and end_logits # Question Answering model returns start_logits and end_logits
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
correct_outlen += 1 # start_logits and end_logits instead of only 1 output correct_outlen += 1 # start_logits and end_logits instead of only 1 output
if "past_key_values" in outputs: if "past_key_values" in outputs:
correct_outlen += 1 # past_key_values have been returned correct_outlen += 1 # past_key_values have been returned

View File

@@ -19,6 +19,7 @@ import unittest
from tests.test_modeling_common import floats_tensor from tests.test_modeling_common import floats_tensor
from transformers import is_torch_available from transformers import is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, slow, torch_device from transformers.testing_utils import require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -352,7 +353,7 @@ class ConvBertModelTest(ModelTesterMixin, unittest.TestCase):
if "labels" in inputs_dict: if "labels" in inputs_dict:
correct_outlen += 1 # loss is added to beginning correct_outlen += 1 # loss is added to beginning
# Question Answering model returns start_logits and end_logits # Question Answering model returns start_logits and end_logits
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
correct_outlen += 1 # start_logits and end_logits instead of only 1 output correct_outlen += 1 # start_logits and end_logits instead of only 1 output
if "past_key_values" in outputs: if "past_key_values" in outputs:
correct_outlen += 1 # past_key_values have been returned correct_outlen += 1 # past_key_values have been returned

View File

@@ -17,6 +17,7 @@
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, slow, torch_device from transformers.testing_utils import require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -292,7 +293,7 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
) )

View File

@@ -29,6 +29,7 @@ if is_flax_available():
FlaxBertForNextSentencePrediction, FlaxBertForNextSentencePrediction,
FlaxBertForPreTraining, FlaxBertForPreTraining,
FlaxBertForQuestionAnswering, FlaxBertForQuestionAnswering,
FlaxBertForSequenceClassification,
FlaxBertForTokenClassification, FlaxBertForTokenClassification,
FlaxBertModel, FlaxBertModel,
) )
@@ -125,6 +126,7 @@ class FlaxBertModelTest(FlaxModelTesterMixin, unittest.TestCase):
FlaxBertForMultipleChoice, FlaxBertForMultipleChoice,
FlaxBertForQuestionAnswering, FlaxBertForQuestionAnswering,
FlaxBertForNextSentencePrediction, FlaxBertForNextSentencePrediction,
FlaxBertForSequenceClassification,
FlaxBertForTokenClassification, FlaxBertForTokenClassification,
FlaxBertForQuestionAnswering, FlaxBertForQuestionAnswering,
) )

View File

@@ -17,6 +17,7 @@
import unittest import unittest
from transformers import FunnelTokenizer, is_torch_available from transformers import FunnelTokenizer, is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -365,7 +366,7 @@ class FunnelModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
) )

View File

@@ -21,6 +21,7 @@ import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.file_utils import cached_property from transformers.file_utils import cached_property
from transformers.models.auto import get_values
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -412,7 +413,7 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
if "labels" in inputs_dict: if "labels" in inputs_dict:
correct_outlen += 1 # loss is added to beginning correct_outlen += 1 # loss is added to beginning
# Question Answering model returns start_logits and end_logits # Question Answering model returns start_logits and end_logits
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
correct_outlen += 1 # start_logits and end_logits instead of only 1 output correct_outlen += 1 # start_logits and end_logits instead of only 1 output
if "past_key_values" in outputs: if "past_key_values" in outputs:
correct_outlen += 1 # past_key_values have been returned correct_outlen += 1 # past_key_values have been returned

View File

@@ -18,6 +18,7 @@ import copy
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, slow, torch_device from transformers.testing_utils import require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -532,11 +533,11 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = copy.deepcopy(inputs_dict) inputs_dict = copy.deepcopy(inputs_dict)
if return_labels: if return_labels:
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
self.model_tester.batch_size, dtype=torch.long, device=torch_device self.model_tester.batch_size, dtype=torch.long, device=torch_device
) )
elif model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): elif model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
# special case for models like BERT that use multi-loss training for PreTraining # special case for models like BERT that use multi-loss training for PreTraining
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device

View File

@@ -21,6 +21,7 @@ import os
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -291,7 +292,7 @@ class MegatronBertModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
) )

View File

@@ -17,6 +17,7 @@
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -272,7 +273,7 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
) )

View File

@@ -32,6 +32,7 @@ from transformers import (
is_torch_available, is_torch_available,
) )
from transformers.file_utils import cached_property from transformers.file_utils import cached_property
from transformers.models.auto import get_values
from transformers.testing_utils import require_scatter, require_torch, slow, torch_device from transformers.testing_utils import require_scatter, require_torch, slow, torch_device
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -425,7 +426,7 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase):
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = copy.deepcopy(inputs_dict) inputs_dict = copy.deepcopy(inputs_dict)
if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
inputs_dict = { inputs_dict = {
k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous() k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous()
if isinstance(v, torch.Tensor) and v.ndim > 1 if isinstance(v, torch.Tensor) and v.ndim > 1
@@ -434,9 +435,9 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase):
} }
if return_labels: if return_labels:
if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device) inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device)
elif model_class in MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING.values(): elif model_class in get_values(MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING):
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
) )
@@ -457,17 +458,17 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase):
self.model_tester.batch_size, dtype=torch.float, device=torch_device self.model_tester.batch_size, dtype=torch.float, device=torch_device
) )
elif model_class in [ elif model_class in [
*MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(), *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
*MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values(), *get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING),
]: ]:
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
self.model_tester.batch_size, dtype=torch.long, device=torch_device self.model_tester.batch_size, dtype=torch.long, device=torch_device
) )
elif model_class in [ elif model_class in [
*MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(), *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
*MODEL_FOR_CAUSAL_LM_MAPPING.values(), *get_values(MODEL_FOR_CAUSAL_LM_MAPPING),
*MODEL_FOR_MASKED_LM_MAPPING.values(), *get_values(MODEL_FOR_MASKED_LM_MAPPING),
*MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(), *get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
]: ]:
inputs_dict["labels"] = torch.zeros( inputs_dict["labels"] = torch.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device

View File

@@ -17,6 +17,7 @@
import unittest import unittest
from transformers import AlbertConfig, is_tf_available from transformers import AlbertConfig, is_tf_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_tf, slow from transformers.testing_utils import require_tf, slow
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -249,7 +250,7 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(TF_MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["sentence_order_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) inputs_dict["sentence_order_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
return inputs_dict return inputs_dict

View File

@@ -13,7 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import copy
import tempfile
import unittest import unittest
from transformers import is_tf_available from transformers import is_tf_available
@@ -39,6 +40,8 @@ if is_tf_available():
TFBertForQuestionAnswering, TFBertForQuestionAnswering,
TFBertForSequenceClassification, TFBertForSequenceClassification,
TFBertModel, TFBertModel,
TFFunnelBaseModel,
TFFunnelModel,
TFGPT2LMHeadModel, TFGPT2LMHeadModel,
TFRobertaForMaskedLM, TFRobertaForMaskedLM,
TFT5ForConditionalGeneration, TFT5ForConditionalGeneration,
@@ -176,6 +179,21 @@ class TFAutoModelTest(unittest.TestCase):
self.assertEqual(model.num_parameters(), 14410) self.assertEqual(model.num_parameters(), 14410)
self.assertEqual(model.num_parameters(only_trainable=True), 14410) self.assertEqual(model.num_parameters(only_trainable=True), 14410)
def test_from_pretrained_with_tuple_values(self):
# For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel
model = TFAutoModel.from_pretrained("sgugger/funnel-random-tiny")
self.assertIsInstance(model, TFFunnelModel)
config = copy.deepcopy(model.config)
config.architectures = ["FunnelBaseModel"]
model = TFAutoModel.from_config(config)
self.assertIsInstance(model, TFFunnelBaseModel)
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir)
model = TFAutoModel.from_pretrained(tmp_dir)
self.assertIsInstance(model, TFFunnelBaseModel)
def test_parents_and_children_in_mappings(self): def test_parents_and_children_in_mappings(self):
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered # Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
# by the parents and will return the wrong configuration type when using auto models # by the parents and will return the wrong configuration type when using auto models
@@ -197,4 +215,12 @@ class TFAutoModelTest(unittest.TestCase):
for parent_config, parent_model in mapping[: index + 1]: for parent_config, parent_model in mapping[: index + 1]:
with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"): with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"):
self.assertFalse(issubclass(child_config, parent_config)) self.assertFalse(issubclass(child_config, parent_config))
self.assertFalse(issubclass(child_model, parent_model))
# Tuplify child_model and parent_model since some of them could be tuples.
if not isinstance(child_model, (list, tuple)):
child_model = (child_model,)
if not isinstance(parent_model, (list, tuple)):
parent_model = (parent_model,)
for child, parent in [(a, b) for a in child_model for b in parent_model]:
assert not issubclass(child, parent), f"{child.__name__} is child of {parent.__name__}"

View File

@@ -17,6 +17,7 @@
import unittest import unittest
from transformers import BertConfig, is_tf_available from transformers import BertConfig, is_tf_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_tf, slow from transformers.testing_utils import require_tf, slow
from .test_configuration_common import ConfigTester from .test_configuration_common import ConfigTester
@@ -282,7 +283,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
if return_labels: if return_labels:
if model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.values(): if model_class in get_values(TF_MODEL_FOR_PRETRAINING_MAPPING):
inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
return inputs_dict return inputs_dict

View File

@@ -25,6 +25,7 @@ from importlib import import_module
from typing import List, Tuple from typing import List, Tuple
from transformers import is_tf_available from transformers import is_tf_available
from transformers.models.auto import get_values
from transformers.testing_utils import ( from transformers.testing_utils import (
_tf_gpu_memory_limit, _tf_gpu_memory_limit,
is_pt_tf_cross_test, is_pt_tf_cross_test,
@@ -89,7 +90,7 @@ class TFModelTesterMixin:
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict: def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:
inputs_dict = copy.deepcopy(inputs_dict) inputs_dict = copy.deepcopy(inputs_dict)
if model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
inputs_dict = { inputs_dict = {
k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1)) k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1))
if isinstance(v, tf.Tensor) and v.ndim > 0 if isinstance(v, tf.Tensor) and v.ndim > 0
@@ -98,21 +99,21 @@ class TFModelTesterMixin:
} }
if return_labels: if return_labels:
if model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32) inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32)
elif model_class in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): elif model_class in get_values(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING):
inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
elif model_class in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(): elif model_class in get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING):
inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
elif model_class in TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values(): elif model_class in get_values(TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING):
inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
elif model_class in [ elif model_class in [
*TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(), *get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
*TF_MODEL_FOR_CAUSAL_LM_MAPPING.values(), *get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING),
*TF_MODEL_FOR_MASKED_LM_MAPPING.values(), *get_values(TF_MODEL_FOR_MASKED_LM_MAPPING),
*TF_MODEL_FOR_PRETRAINING_MAPPING.values(), *get_values(TF_MODEL_FOR_PRETRAINING_MAPPING),
*TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(), *get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
]: ]:
inputs_dict["labels"] = tf.zeros( inputs_dict["labels"] = tf.zeros(
(self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32 (self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32
@@ -580,7 +581,7 @@ class TFModelTesterMixin:
), ),
"input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"), "input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"),
} }
elif model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): elif model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
input_ids = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32") input_ids = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32")
else: else:
input_ids = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32") input_ids = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32")
@@ -796,9 +797,9 @@ class TFModelTesterMixin:
def test_model_common_attributes(self): def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = ( list_lm_models = (
list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.values()) get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING)
+ list(TF_MODEL_FOR_MASKED_LM_MAPPING.values()) + get_values(TF_MODEL_FOR_MASKED_LM_MAPPING)
+ list(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values()) + get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING)
) )
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
@@ -1128,7 +1129,7 @@ class TFModelTesterMixin:
] ]
loss_size = tf.size(added_label) loss_size = tf.size(added_label)
if model.__class__ in TF_MODEL_FOR_CAUSAL_LM_MAPPING.values(): if model.__class__ in get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING):
# if loss is causal lm loss, labels are shift, so that one label per batch # if loss is causal lm loss, labels are shift, so that one label per batch
# is cut # is cut
loss_size = loss_size - self.model_tester.batch_size loss_size = loss_size - self.model_tester.batch_size

View File

@@ -19,6 +19,8 @@ import os
import re import re
from pathlib import Path from pathlib import Path
from transformers.models.auto import get_values
# All paths are set with the intent you should run this script from the root of the repo with the command # All paths are set with the intent you should run this script from the root of the repo with the command
# python utils/check_repo.py # python utils/check_repo.py
@@ -86,7 +88,6 @@ IGNORE_NON_AUTO_CONFIGURED = [
"DPRReader", "DPRReader",
"DPRSpanPredictor", "DPRSpanPredictor",
"FlaubertForQuestionAnswering", "FlaubertForQuestionAnswering",
"FunnelBaseModel",
"GPT2DoubleHeadsModel", "GPT2DoubleHeadsModel",
"OpenAIGPTDoubleHeadsModel", "OpenAIGPTDoubleHeadsModel",
"RagModel", "RagModel",
@@ -95,7 +96,6 @@ IGNORE_NON_AUTO_CONFIGURED = [
"T5Stack", "T5Stack",
"TFDPRReader", "TFDPRReader",
"TFDPRSpanPredictor", "TFDPRSpanPredictor",
"TFFunnelBaseModel",
"TFGPT2DoubleHeadsModel", "TFGPT2DoubleHeadsModel",
"TFOpenAIGPTDoubleHeadsModel", "TFOpenAIGPTDoubleHeadsModel",
"TFRagModel", "TFRagModel",
@@ -153,7 +153,7 @@ def get_model_modules():
def get_models(module): def get_models(module):
""" Get the objects in module that are models.""" """ Get the objects in module that are models."""
models = [] models = []
model_classes = (transformers.PreTrainedModel, transformers.TFPreTrainedModel) model_classes = (transformers.PreTrainedModel, transformers.TFPreTrainedModel, transformers.FlaxPreTrainedModel)
for attr_name in dir(module): for attr_name in dir(module):
if "Pretrained" in attr_name or "PreTrained" in attr_name: if "Pretrained" in attr_name or "PreTrained" in attr_name:
continue continue
@@ -249,10 +249,13 @@ def get_all_auto_configured_models():
result = set() # To avoid duplicates we concatenate all model classes in a set. result = set() # To avoid duplicates we concatenate all model classes in a set.
for attr_name in dir(transformers.models.auto.modeling_auto): for attr_name in dir(transformers.models.auto.modeling_auto):
if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING"): if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING"):
result = result | set(getattr(transformers.models.auto.modeling_auto, attr_name).values()) result = result | set(get_values(getattr(transformers.models.auto.modeling_auto, attr_name)))
for attr_name in dir(transformers.models.auto.modeling_tf_auto): for attr_name in dir(transformers.models.auto.modeling_tf_auto):
if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING"): if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING"):
result = result | set(getattr(transformers.models.auto.modeling_tf_auto, attr_name).values()) result = result | set(get_values(getattr(transformers.models.auto.modeling_tf_auto, attr_name)))
for attr_name in dir(transformers.models.auto.modeling_flax_auto):
if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING"):
result = result | set(get_values(getattr(transformers.models.auto.modeling_flax_auto, attr_name)))
return [cls.__name__ for cls in result] return [cls.__name__ for cls in result]