From ba2cf5f90d2e81c97a528b73479c9cfac730f6f7 Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Thu, 8 Apr 2021 14:36:45 -0400 Subject: [PATCH 1/8] Add fairscale and deepspeed back to the CI (#11147) * Add fairscale and deepspeed back to the CI * Add deepspeed to single GPU tests --- .github/workflows/self-scheduled.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 3b72baea0d..c49a967d2a 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -34,6 +34,7 @@ jobs: apt -y update && apt install -y libsndfile1-dev pip install --upgrade pip pip install .[sklearn,testing,onnxruntime,sentencepiece,speech] + pip install deepspeed - name: Are GPUs recognized by our DL frameworks run: | @@ -156,6 +157,8 @@ jobs: apt -y update && apt install -y libsndfile1-dev pip install --upgrade pip pip install .[sklearn,testing,onnxruntime,sentencepiece,speech] + pip install fairscale + pip install deepspeed - name: Are GPUs recognized by our DL frameworks run: | From 9c9b8e707b9803a1425ed8dd2f51069b22d9230f Mon Sep 17 00:00:00 2001 From: Philipp Schmid <32632186+philschmid@users.noreply.github.com> Date: Thu, 8 Apr 2021 22:05:53 +0200 Subject: [PATCH 2/8] Updates SageMaker docs for updating DLCs (#11140) --- tests/sagemaker/README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/sagemaker/README.md b/tests/sagemaker/README.md index b3c9906cc5..3d8ab7c2bf 100644 --- a/tests/sagemaker/README.md +++ b/tests/sagemaker/README.md @@ -136,10 +136,7 @@ images: docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ] ``` -2. In the PR comment describe what test we ran and with which framework versions. Here you can copy the table from [Current Tests](#current-tests). - -TODO: Add a screenshot of PR + Text template to make it easy to open. - +2. In the PR comment describe what test we ran and with which framework versions. Here you can copy the table from [Current Tests](#current-tests). You can take a look at this [PR](https://github.com/aws/deep-learning-containers/pull/1016), which information are needed. ## Current Tests @@ -150,4 +147,4 @@ TODO: Add a screenshot of PR + Text template to make it easy to open. | pytorch-transfromers-test-2-smd | test bert finetuning using BERT from transformer lib+ PT SM DDP | SageMaker createTrainingJob | 16 | train_runtime, eval_accuracy & eval_loss | | pytorch-transfromers-test-1-smp | test roberta finetuning using BERT from transformer lib+ PT SM MP | SageMaker createTrainingJob | 8 | train_runtime, eval_accuracy & eval_loss | | tensorflow-transfromers-test-single | Test bert finetuning using BERT from transformer lib+TF | SageMaker createTrainingJob | 1 | train_runtime, eval_accuracy & eval_loss | -| tensorflow-transfromers-test-2-smd | test bert finetuning using BERT from transformer lib+ TF SM DDP | SageMaker createTrainingJob | 16 | train_runtime, eval_accuracy & eval_loss | \ No newline at end of file +| tensorflow-transfromers-test-2-smd | test bert finetuning using BERT from transformer lib+ TF SM DDP | SageMaker createTrainingJob | 16 | train_runtime, eval_accuracy & eval_loss | From dfed4ec263db3a2eb5d018b10abfaf80e339babe Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Thu, 8 Apr 2021 16:12:36 -0400 Subject: [PATCH 3/8] Don't duplicate logs in TensorBoard and handle --use_env (#11141) --- src/transformers/integrations.py | 8 +++++--- src/transformers/training_args.py | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index ffa78bf3f0..65824c25ca 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -604,9 +604,11 @@ class TensorBoardCallback(TrainerCallback): self.tb_writer.add_hparams(args.to_sanitized_dict(), metric_dict={}) def on_log(self, args, state, control, logs=None, **kwargs): - if state.is_world_process_zero: - if self.tb_writer is None: - self._init_summary_writer(args) + if not state.is_world_process_zero: + return + + if self.tb_writer is None: + self._init_summary_writer(args) if self.tb_writer is not None: logs = rewrite_logs(logs) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 9e55355290..188bf92b63 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -531,6 +531,12 @@ class TrainingArguments: ) def __post_init__(self): + # Handle --use_env option in torch.distributed.launch (local_rank not passed as an arg then). + # This needs to happen before any call to self.device or self.n_gpu. + env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) + if env_local_rank != -1 and env_local_rank != self.local_rank: + self.local_rank = env_local_rank + # expand paths, if not os.makedirs("~/bar") will make directory # in the current directory instead of the actual home #  see https://github.com/huggingface/transformers/issues/10628 From 6c40e49712972141c3d7aeba4ed90bb79f2bb078 Mon Sep 17 00:00:00 2001 From: Andrea Cappelli Date: Thu, 8 Apr 2021 22:12:49 +0200 Subject: [PATCH 4/8] Run mlm pad to multiple for fp16 (#11128) * Add mlm collator pad to multiple option (#10627) * Use padding to 8x in run mlm (#10627) --- examples/language-modeling/run_mlm.py | 7 +++- src/transformers/data/data_collator.py | 13 ++++-- tests/test_data_collator.py | 56 ++++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py index 4fd3c4f217..2934fb0c23 100755 --- a/examples/language-modeling/run_mlm.py +++ b/examples/language-modeling/run_mlm.py @@ -422,7 +422,12 @@ def main(): # Data collator # This one will take care of randomly masking the tokens. - data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=data_args.mlm_probability) + pad_to_multiple_of_8 = data_args.line_by_line and training_args.fp16 and not data_args.pad_to_max_length + data_collator = DataCollatorForLanguageModeling( + tokenizer=tokenizer, + mlm_probability=data_args.mlm_probability, + pad_to_multiple_of=8 if pad_to_multiple_of_8 else None, + ) # Initialize our Trainer trainer = Trainer( diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py index 94eaade7b1..9915eb5a5f 100644 --- a/src/transformers/data/data_collator.py +++ b/src/transformers/data/data_collator.py @@ -192,7 +192,7 @@ class DataCollatorForTokenClassification: return batch -def _collate_batch(examples, tokenizer): +def _collate_batch(examples, tokenizer, pad_to_multiple_of: Optional[int] = None): """Collate `examples` into a batch, using the information in `tokenizer` for padding if necessary.""" # Tensorize if necessary. if isinstance(examples[0], (list, tuple)): @@ -201,7 +201,7 @@ def _collate_batch(examples, tokenizer): # Check if padding is necessary. length_of_first = examples[0].size(0) are_tensors_same_length = all(x.size(0) == length_of_first for x in examples) - if are_tensors_same_length: + if are_tensors_same_length and (pad_to_multiple_of is None or length_of_first % pad_to_multiple_of == 0): return torch.stack(examples, dim=0) # If yes, check if we have a `pad_token`. @@ -213,6 +213,8 @@ def _collate_batch(examples, tokenizer): # Creating the full tensor and filling it with our data. max_length = max(x.size(0) for x in examples) + if pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0): + max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of result = examples[0].new_full([len(examples), max_length], tokenizer.pad_token_id) for i, example in enumerate(examples): if tokenizer.padding_side == "right": @@ -311,6 +313,8 @@ class DataCollatorForLanguageModeling: non-masked tokens and the value to predict for the masked token. mlm_probability (:obj:`float`, `optional`, defaults to 0.15): The probability with which to (randomly) mask tokens in the input, when :obj:`mlm` is set to :obj:`True`. + pad_to_multiple_of (:obj:`int`, `optional`): + If set will pad the sequence to a multiple of the provided value. .. note:: @@ -323,6 +327,7 @@ class DataCollatorForLanguageModeling: tokenizer: PreTrainedTokenizerBase mlm: bool = True mlm_probability: float = 0.15 + pad_to_multiple_of: Optional[int] = None def __post_init__(self): if self.mlm and self.tokenizer.mask_token is None: @@ -336,9 +341,9 @@ class DataCollatorForLanguageModeling: ) -> Dict[str, torch.Tensor]: # Handle dict or lists with proper padding and conversion to tensor. if isinstance(examples[0], (dict, BatchEncoding)): - batch = self.tokenizer.pad(examples, return_tensors="pt") + batch = self.tokenizer.pad(examples, return_tensors="pt", pad_to_multiple_of=self.pad_to_multiple_of) else: - batch = {"input_ids": _collate_batch(examples, self.tokenizer)} + batch = {"input_ids": _collate_batch(examples, self.tokenizer, pad_to_multiple_of=self.pad_to_multiple_of)} # If special token mask has been preprocessed, pop it from the dict. special_tokens_mask = batch.pop("special_tokens_mask", None) diff --git a/tests/test_data_collator.py b/tests/test_data_collator.py index be138314d3..e9d363229f 100644 --- a/tests/test_data_collator.py +++ b/tests/test_data_collator.py @@ -146,11 +146,8 @@ class DataCollatorIntegrationTest(unittest.TestCase): self.assertEqual(batch["labels"].shape, torch.Size([2, 6])) self.assertEqual(batch["labels"][0].tolist(), [0, 1, 2] + [-1] * 3) - def test_data_collator_for_language_modeling(self): + def _test_no_pad_and_pad(self, no_pad_features, pad_features): tokenizer = BertTokenizer(self.vocab_file) - no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] - pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}] - data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False) batch = data_collator(no_pad_features) self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10))) @@ -160,6 +157,15 @@ class DataCollatorIntegrationTest(unittest.TestCase): self.assertEqual(batch["input_ids"].shape, torch.Size((2, 10))) self.assertEqual(batch["labels"].shape, torch.Size((2, 10))) + data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, pad_to_multiple_of=8) + batch = data_collator(no_pad_features) + self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16))) + self.assertEqual(batch["labels"].shape, torch.Size((2, 16))) + + batch = data_collator(pad_features) + self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16))) + self.assertEqual(batch["labels"].shape, torch.Size((2, 16))) + tokenizer._pad_token = None data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False) with self.assertRaises(ValueError): @@ -185,6 +191,32 @@ class DataCollatorIntegrationTest(unittest.TestCase): self.assertTrue(torch.any(masked_tokens)) self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist())) + data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8) + batch = data_collator(no_pad_features) + self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16))) + self.assertEqual(batch["labels"].shape, torch.Size((2, 16))) + + masked_tokens = batch["input_ids"] == tokenizer.mask_token_id + self.assertTrue(torch.any(masked_tokens)) + self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist())) + + batch = data_collator(pad_features) + self.assertEqual(batch["input_ids"].shape, torch.Size((2, 16))) + self.assertEqual(batch["labels"].shape, torch.Size((2, 16))) + + masked_tokens = batch["input_ids"] == tokenizer.mask_token_id + self.assertTrue(torch.any(masked_tokens)) + self.assertTrue(all(x == -100 for x in batch["labels"][~masked_tokens].tolist())) + + def test_data_collator_for_language_modeling(self): + no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] + pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}] + self._test_no_pad_and_pad(no_pad_features, pad_features) + + no_pad_features = [list(range(10)), list(range(10))] + pad_features = [list(range(5)), list(range(10))] + self._test_no_pad_and_pad(no_pad_features, pad_features) + def test_plm(self): tokenizer = BertTokenizer(self.vocab_file) no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] @@ -225,6 +257,14 @@ class DataCollatorIntegrationTest(unittest.TestCase): self.assertEqual(batch["labels"].shape, torch.Size((2, 5))) self.assertEqual(batch["next_sentence_label"].shape, torch.Size((2,))) + data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8) + batch = data_collator(features) + + self.assertEqual(batch["input_ids"].shape, torch.Size((2, 8))) + self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 8))) + self.assertEqual(batch["labels"].shape, torch.Size((2, 8))) + self.assertEqual(batch["next_sentence_label"].shape, torch.Size((2,))) + def test_sop(self): tokenizer = BertTokenizer(self.vocab_file) features = [ @@ -242,3 +282,11 @@ class DataCollatorIntegrationTest(unittest.TestCase): self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 5))) self.assertEqual(batch["labels"].shape, torch.Size((2, 5))) self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,))) + + data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8) + batch = data_collator(features) + + self.assertEqual(batch["input_ids"].shape, torch.Size((2, 8))) + self.assertEqual(batch["token_type_ids"].shape, torch.Size((2, 8))) + self.assertEqual(batch["labels"].shape, torch.Size((2, 8))) + self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,))) From 66446909b236c17498276857fa88e23d2c91d004 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 8 Apr 2021 13:13:17 -0700 Subject: [PATCH 5/8] [tests] relocate core integration tests (#11146) * relocate core integration tests * add sys.path context manager * cleanup * try * try2 * fix path * doc * style * add dep * add 2 more deps --- docs/source/main_classes/trainer.rst | 4 ++-- docs/source/testing.rst | 23 ++++++++++++++++--- setup.py | 13 +++++++---- src/transformers/dependency_versions_table.py | 9 +++++--- src/transformers/testing_utils.py | 22 ++++++++++++++++++ .../deepspeed/ds_config_zero2.json | 0 .../deepspeed/ds_config_zero3.json | 0 .../deepspeed/test_deepspeed.py | 11 +++++---- .../extended}/test_trainer_ext.py | 5 ++-- 9 files changed, 68 insertions(+), 19 deletions(-) rename {examples/tests => tests}/deepspeed/ds_config_zero2.json (100%) rename {examples/tests => tests}/deepspeed/ds_config_zero3.json (100%) rename {examples/tests => tests}/deepspeed/test_deepspeed.py (98%) rename {examples/tests/trainer => tests/extended}/test_trainer_ext.py (98%) diff --git a/docs/source/main_classes/trainer.rst b/docs/source/main_classes/trainer.rst index 2e323aaa28..bc9f248827 100644 --- a/docs/source/main_classes/trainer.rst +++ b/docs/source/main_classes/trainer.rst @@ -525,7 +525,7 @@ Here is an example of running ``run_translation.py`` under DeepSpeed deploying a .. code-block:: bash deepspeed examples/seq2seq/run_translation.py \ - --deepspeed examples/tests/deepspeed/ds_config.json \ + --deepspeed tests/deepspeed/ds_config.json \ --model_name_or_path t5-small --per_device_train_batch_size 1 \ --output_dir output_dir --overwrite_output_dir --fp16 \ --do_train --max_train_samples 500 --num_train_epochs 1 \ @@ -550,7 +550,7 @@ To deploy DeepSpeed with one GPU adjust the :class:`~transformers.Trainer` comma .. code-block:: bash deepspeed --num_gpus=1 examples/seq2seq/run_translation.py \ - --deepspeed examples/tests/deepspeed/ds_config.json \ + --deepspeed tests/deepspeed/ds_config.json \ --model_name_or_path t5-small --per_device_train_batch_size 1 \ --output_dir output_dir --overwrite_output_dir --fp16 \ --do_train --max_train_samples 500 --num_train_epochs 1 \ diff --git a/docs/source/testing.rst b/docs/source/testing.rst index 10ad3e2311..9a4efb06fc 100644 --- a/docs/source/testing.rst +++ b/docs/source/testing.rst @@ -1,4 +1,4 @@ -.. +.. Copyright 2020 The HuggingFace Team. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with @@ -388,7 +388,7 @@ For a single or a group of tests via ``pytest`` (after ``pip install pytest-pspe .. code-block:: bash - pytest --pspec tests/test_optimization.py + pytest --pspec tests/test_optimization.py @@ -672,7 +672,7 @@ and it will list: test_this2.py::test_floor[integer-1-1.0] test_this2.py::test_floor[negative--1.5--2.0] - test_this2.py::test_floor[large fraction-1.6-1] + test_this2.py::test_floor[large fraction-1.6-1] So now you can run just the specific test: @@ -795,6 +795,23 @@ leave any data in there. otherwise. +Temporary sys.path override +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you need to temporary override ``sys.path`` to import from another test for example, you can use the +``ExtendSysPath`` context manager. Example: + + +.. code-block:: python + + import os + from transformers.testing_utils import ExtendSysPath + bindir = os.path.abspath(os.path.dirname(__file__)) + with ExtendSysPath(f"{bindir}/.."): + from test_trainer import TrainerIntegrationCommon # noqa + + + Skipping tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/setup.py b/setup.py index 45df48f68b..c3583a3070 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ To create the package for pypi. 1. Run `make pre-release` (or `make pre-patch` for a patch release) then run `make fix-copies` to fix the index of the documentation. - + 2. Run Tests for Amazon Sagemaker. The documentation is located in `./tests/sagemaker/README.md`, otherwise @philschmid. 3. Unpin specific versions from setup.py that use a git install. @@ -85,6 +85,7 @@ if stale_egg_info.exists(): # 1. all dependencies should be listed here with their version requirements if any # 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py _deps = [ + "Pillow", "black>=20.8b1", "cookiecutter==1.7.2", "dataclasses", @@ -102,13 +103,13 @@ _deps = [ "jax>=0.2.8", "jaxlib>=0.1.59", "keras2onnx", + "nltk", "numpy>=1.17", "onnxconverter-common", "onnxruntime-tools>=1.4.2", "onnxruntime>=1.4.0", "packaging", "parameterized", - "Pillow", "protobuf", "psutil", "pydantic", @@ -119,15 +120,18 @@ _deps = [ "recommonmark", "regex!=2019.12.17", "requests", + "rouge-score", + "sacrebleu>=1.4.12", "sacremoses", + "sagemaker>=2.31.0", "scikit-learn", "sentencepiece==0.1.91", "soundfile", "sphinx-copybutton", "sphinx-markdown-tables", "sphinx-rtd-theme==0.4.3", # sphinx-rtd-theme==0.5.0 introduced big changes in the style. - "sphinxext-opengraph==0.4.1", "sphinx==3.2.1", + "sphinxext-opengraph==0.4.1", "starlette", "tensorflow-cpu>=2.3", "tensorflow>=2.3", @@ -139,7 +143,6 @@ _deps = [ "unidic>=1.0.2", "unidic_lite>=1.0.7", "uvicorn", - "sagemaker>=2.31.0", ] @@ -238,7 +241,7 @@ extras["vision"] = deps_list("Pillow") extras["sentencepiece"] = deps_list("sentencepiece", "protobuf") extras["testing"] = ( deps_list( - "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-sugar", "black" + "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-sugar", "black", "sacrebleu", "rouge-score", "nltk" ) + extras["retrieval"] + extras["modelcreation"] diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index b53407ad3e..43f4c028fe 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -2,6 +2,7 @@ # 1. modify the `_deps` dict in setup.py # 2. run `make deps_table_update`` deps = { + "Pillow": "Pillow", "black": "black>=20.8b1", "cookiecutter": "cookiecutter==1.7.2", "dataclasses": "dataclasses", @@ -19,13 +20,13 @@ deps = { "jax": "jax>=0.2.8", "jaxlib": "jaxlib>=0.1.59", "keras2onnx": "keras2onnx", + "nltk": "nltk", "numpy": "numpy>=1.17", "onnxconverter-common": "onnxconverter-common", "onnxruntime-tools": "onnxruntime-tools>=1.4.2", "onnxruntime": "onnxruntime>=1.4.0", "packaging": "packaging", "parameterized": "parameterized", - "Pillow": "Pillow", "protobuf": "protobuf", "psutil": "psutil", "pydantic": "pydantic", @@ -36,15 +37,18 @@ deps = { "recommonmark": "recommonmark", "regex": "regex!=2019.12.17", "requests": "requests", + "rouge-score": "rouge-score", + "sacrebleu": "sacrebleu>=1.4.12", "sacremoses": "sacremoses", + "sagemaker": "sagemaker>=2.31.0", "scikit-learn": "scikit-learn", "sentencepiece": "sentencepiece==0.1.91", "soundfile": "soundfile", "sphinx-copybutton": "sphinx-copybutton", "sphinx-markdown-tables": "sphinx-markdown-tables", "sphinx-rtd-theme": "sphinx-rtd-theme==0.4.3", - "sphinxext-opengraph": "sphinxext-opengraph==0.4.1", "sphinx": "sphinx==3.2.1", + "sphinxext-opengraph": "sphinxext-opengraph==0.4.1", "starlette": "starlette", "tensorflow-cpu": "tensorflow-cpu>=2.3", "tensorflow": "tensorflow>=2.3", @@ -56,5 +60,4 @@ deps = { "unidic": "unidic>=1.0.2", "unidic_lite": "unidic_lite>=1.0.7", "uvicorn": "uvicorn", - "sagemaker": "sagemaker>=2.31.0", } diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 3f1273a7c9..a5c4e7d2b8 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -24,6 +24,7 @@ import unittest from distutils.util import strtobool from io import StringIO from pathlib import Path +from typing import Iterator, Union from .file_utils import ( is_datasets_available, @@ -621,6 +622,27 @@ class CaptureLogger: return f"captured: {self.out}\n" +@contextlib.contextmanager +# adapted from https://stackoverflow.com/a/64789046/9201239 +def ExtendSysPath(path: Union[str, os.PathLike]) -> Iterator[None]: + """ + Temporary add given path to `sys.path`. + + Usage :: + + with ExtendSysPath('/path/to/dir'): + mymodule = importlib.import_module('mymodule') + + """ + + path = os.fspath(path) + try: + sys.path.insert(0, path) + yield + finally: + sys.path.remove(path) + + class TestCasePlus(unittest.TestCase): """ This class extends `unittest.TestCase` with additional features. diff --git a/examples/tests/deepspeed/ds_config_zero2.json b/tests/deepspeed/ds_config_zero2.json similarity index 100% rename from examples/tests/deepspeed/ds_config_zero2.json rename to tests/deepspeed/ds_config_zero2.json diff --git a/examples/tests/deepspeed/ds_config_zero3.json b/tests/deepspeed/ds_config_zero3.json similarity index 100% rename from examples/tests/deepspeed/ds_config_zero3.json rename to tests/deepspeed/ds_config_zero3.json diff --git a/examples/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py similarity index 98% rename from examples/tests/deepspeed/test_deepspeed.py rename to tests/deepspeed/test_deepspeed.py index b9c9b46167..9baaf3085b 100644 --- a/examples/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -16,16 +16,16 @@ import dataclasses import io import json import os -import sys import unittest from copy import deepcopy from parameterized import parameterized -from transformers import TrainingArguments +from transformers import TrainingArguments, is_torch_available from transformers.file_utils import WEIGHTS_NAME from transformers.integrations import is_deepspeed_available from transformers.testing_utils import ( CaptureLogger, + ExtendSysPath, TestCasePlus, execute_subprocess_async, get_gpu_count, @@ -38,8 +38,11 @@ from transformers.trainer_utils import set_seed bindir = os.path.abspath(os.path.dirname(__file__)) -sys.path.append(f"{bindir}/../../../tests") -from test_trainer import TrainerIntegrationCommon, get_regression_trainer # noqa +with ExtendSysPath(f"{bindir}/.."): + from test_trainer import TrainerIntegrationCommon # noqa + + if is_torch_available(): + from test_trainer import get_regression_trainer # noqa set_seed(42) diff --git a/examples/tests/trainer/test_trainer_ext.py b/tests/extended/test_trainer_ext.py similarity index 98% rename from examples/tests/trainer/test_trainer_ext.py rename to tests/extended/test_trainer_ext.py index 82ec2f625c..6d13f9a4cc 100644 --- a/examples/tests/trainer/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -21,6 +21,7 @@ from unittest.mock import patch from transformers.file_utils import is_apex_available from transformers.integrations import is_fairscale_available from transformers.testing_utils import ( + ExtendSysPath, TestCasePlus, execute_subprocess_async, get_gpu_count, @@ -34,8 +35,8 @@ from transformers.trainer_utils import set_seed bindir = os.path.abspath(os.path.dirname(__file__)) -sys.path.append(f"{bindir}/../../seq2seq") -from run_translation import main # noqa +with ExtendSysPath(f"{bindir}/../../examples/seq2seq"): + from run_translation import main # noqa set_seed(42) From 97ccf67bb3c8bc309e8d2302874f56c734628a1c Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 8 Apr 2021 15:10:44 -0700 Subject: [PATCH 6/8] [setup] extras[docs] must include 'all' (#11148) * extras[doc] must include 'all' * fix * better * regroup --- .circleci/config.yml | 4 ++-- setup.py | 24 ++++++++++++++---------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 999af392fb..ec9c5741fb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -348,7 +348,7 @@ jobs: - v0.4-{{ checksum "setup.py" }} - run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev - run: pip install --upgrade pip - - run: pip install ."[all, docs]" + - run: pip install ."[docs]" - save_cache: key: v0.4-build_doc-{{ checksum "setup.py" }} paths: @@ -370,7 +370,7 @@ jobs: keys: - v0.4-deploy_doc-{{ checksum "setup.py" }} - v0.4-{{ checksum "setup.py" }} - - run: pip install ."[all,docs]" + - run: pip install ."[docs]" - save_cache: key: v0.4-deploy_doc-{{ checksum "setup.py" }} paths: diff --git a/setup.py b/setup.py index c3583a3070..e942e65a7c 100644 --- a/setup.py +++ b/setup.py @@ -246,15 +246,7 @@ extras["testing"] = ( + extras["retrieval"] + extras["modelcreation"] ) -extras["docs"] = deps_list( - "docutils", - "recommonmark", - "sphinx", - "sphinx-markdown-tables", - "sphinx-rtd-theme", - "sphinx-copybutton", - "sphinxext-opengraph", -) + extras["quality"] = deps_list("black", "isort", "flake8") extras["all"] = ( @@ -267,12 +259,24 @@ extras["all"] = ( + extras["vision"] ) +extras["docs_specific"] = deps_list( + "docutils", + "recommonmark", + "sphinx", + "sphinx-markdown-tables", + "sphinx-rtd-theme", + "sphinx-copybutton", + "sphinxext-opengraph", +) +# "docs" needs "all" to resolve all the references +extras["docs"] = extras["all"] + extras["docs_specific"] + extras["dev"] = ( extras["all"] + extras["testing"] + extras["quality"] + extras["ja"] - + extras["docs"] + + extras["docs_specific"] + extras["sklearn"] + extras["modelcreation"] ) From ba8b1f4754257e140ddabbe04a7f3e493e33802d Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Thu, 8 Apr 2021 18:41:36 -0400 Subject: [PATCH 7/8] Add support for multiple models for one config in auto classes (#11150) * Add support for multiple models for one config in auto classes * Use get_values everywhere * Prettier doc --- src/transformers/modeling_flax_utils.py | 1 + src/transformers/modeling_tf_utils.py | 1 + src/transformers/models/auto/__init__.py | 2 + src/transformers/models/auto/auto_factory.py | 39 +++++++++++++++++-- .../models/auto/configuration_auto.py | 19 ++++++--- src/transformers/models/auto/modeling_auto.py | 3 +- .../models/auto/modeling_tf_auto.py | 3 +- tests/test_modeling_albert.py | 3 +- tests/test_modeling_auto.py | 32 +++++++++++++-- tests/test_modeling_bert.py | 3 +- tests/test_modeling_big_bird.py | 3 +- tests/test_modeling_common.py | 27 ++++++------- tests/test_modeling_convbert.py | 3 +- tests/test_modeling_electra.py | 3 +- tests/test_modeling_flax_bert.py | 2 + tests/test_modeling_funnel.py | 3 +- tests/test_modeling_led.py | 3 +- tests/test_modeling_lxmert.py | 5 ++- tests/test_modeling_megatron_bert.py | 3 +- tests/test_modeling_mobilebert.py | 3 +- tests/test_modeling_tapas.py | 19 ++++----- tests/test_modeling_tf_albert.py | 3 +- tests/test_modeling_tf_auto.py | 30 +++++++++++++- tests/test_modeling_tf_bert.py | 3 +- tests/test_modeling_tf_common.py | 31 ++++++++------- utils/check_repo.py | 13 ++++--- 26 files changed, 188 insertions(+), 72 deletions(-) diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index c425f1a000..b9464ad3e5 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -387,6 +387,7 @@ class FlaxPreTrainedModel(ABC): # get abs dir save_directory = os.path.abspath(save_directory) # save config as well + self.config.architectures = [self.__class__.__name__[4:]] self.config.save_pretrained(save_directory) # save model diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 3eec82e0db..002a7667f2 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -1037,6 +1037,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): logger.info(f"Saved model created in {saved_model_dir}") # Save configuration file + self.config.architectures = [self.__class__.__name__[2:]] self.config.save_pretrained(save_directory) # If we save using the predefined names, we can load using `from_pretrained` diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py index ef255d8b26..4abf6da50d 100644 --- a/src/transformers/models/auto/__init__.py +++ b/src/transformers/models/auto/__init__.py @@ -22,6 +22,7 @@ from ...file_utils import _BaseLazyModule, is_flax_available, is_tf_available, i _import_structure = { + "auto_factory": ["get_values"], "configuration_auto": ["ALL_PRETRAINED_CONFIG_ARCHIVE_MAP", "CONFIG_MAPPING", "MODEL_NAMES_MAPPING", "AutoConfig"], "feature_extraction_auto": ["FEATURE_EXTRACTOR_MAPPING", "AutoFeatureExtractor"], "tokenization_auto": ["TOKENIZER_MAPPING", "AutoTokenizer"], @@ -104,6 +105,7 @@ if is_flax_available(): if TYPE_CHECKING: + from .auto_factory import get_values from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, MODEL_NAMES_MAPPING, AutoConfig from .feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor from .tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer diff --git a/src/transformers/models/auto/auto_factory.py b/src/transformers/models/auto/auto_factory.py index 1c96f13199..4ec9b6c31c 100644 --- a/src/transformers/models/auto/auto_factory.py +++ b/src/transformers/models/auto/auto_factory.py @@ -328,6 +328,26 @@ FROM_PRETRAINED_FLAX_DOCSTRING = """ """ +def _get_model_class(config, model_mapping): + supported_models = model_mapping[type(config)] + if not isinstance(supported_models, (list, tuple)): + return supported_models + + name_to_model = {model.__name__: model for model in supported_models} + architectures = getattr(config, "architectures", []) + for arch in architectures: + if arch in name_to_model: + return name_to_model[arch] + elif f"TF{arch}" in name_to_model: + return name_to_model[f"TF{arch}"] + elif f"Flax{arch}" in name_to_model: + return name_to_model[f"Flax{arch}"] + + # If not architecture is set in the config or match the supported models, the first element of the tuple is the + # defaults. + return supported_models[0] + + class _BaseAutoModelClass: # Base class for auto models. _model_mapping = None @@ -341,7 +361,8 @@ class _BaseAutoModelClass: def from_config(cls, config, **kwargs): if type(config) in cls._model_mapping.keys(): - return cls._model_mapping[type(config)](config, **kwargs) + model_class = _get_model_class(config, cls._model_mapping) + return model_class(config, **kwargs) raise ValueError( f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." @@ -356,9 +377,8 @@ class _BaseAutoModelClass: ) if type(config) in cls._model_mapping.keys(): - return cls._model_mapping[type(config)].from_pretrained( - pretrained_model_name_or_path, *model_args, config=config, **kwargs - ) + model_class = _get_model_class(config, cls._model_mapping) + return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs) raise ValueError( f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." @@ -418,3 +438,14 @@ def auto_class_factory(name, model_mapping, checkpoint_for_example="bert-base-ca from_pretrained = replace_list_option_in_docstrings(model_mapping)(from_pretrained) new_class.from_pretrained = classmethod(from_pretrained) return new_class + + +def get_values(model_mapping): + result = [] + for model in model_mapping.values(): + if isinstance(model, (list, tuple)): + result += list(model) + else: + result.append(model) + + return result diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index 2bb4586349..aa095c4e6a 100644 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -247,29 +247,38 @@ MODEL_NAMES_MAPPING = OrderedDict( ) +def _get_class_name(model_class): + if isinstance(model_class, (list, tuple)): + return " or ".join([f":class:`~transformers.{c.__name__}`" for c in model_class]) + return f":class:`~transformers.{model_class.__name__}`" + + def _list_model_options(indent, config_to_class=None, use_model_types=True): if config_to_class is None and not use_model_types: raise ValueError("Using `use_model_types=False` requires a `config_to_class` dictionary.") if use_model_types: if config_to_class is None: - model_type_to_name = {model_type: config.__name__ for model_type, config in CONFIG_MAPPING.items()} + model_type_to_name = { + model_type: f":class:`~transformers.{config.__name__}`" + for model_type, config in CONFIG_MAPPING.items() + } else: model_type_to_name = { - model_type: config_to_class[config].__name__ + model_type: _get_class_name(config_to_class[config]) for model_type, config in CONFIG_MAPPING.items() if config in config_to_class } lines = [ - f"{indent}- **{model_type}** -- :class:`~transformers.{model_type_to_name[model_type]}` ({MODEL_NAMES_MAPPING[model_type]} model)" + f"{indent}- **{model_type}** -- {model_type_to_name[model_type]} ({MODEL_NAMES_MAPPING[model_type]} model)" for model_type in sorted(model_type_to_name.keys()) ] else: - config_to_name = {config.__name__: clas.__name__ for config, clas in config_to_class.items()} + config_to_name = {config.__name__: _get_class_name(clas) for config, clas in config_to_class.items()} config_to_model_name = { config.__name__: MODEL_NAMES_MAPPING[model_type] for model_type, config in CONFIG_MAPPING.items() } lines = [ - f"{indent}- :class:`~transformers.{config_name}` configuration class: :class:`~transformers.{config_to_name[config_name]}` ({config_to_model_name[config_name]} model)" + f"{indent}- :class:`~transformers.{config_name}` configuration class: {config_to_name[config_name]} ({config_to_model_name[config_name]} model)" for config_name in sorted(config_to_name.keys()) ] return "\n".join(lines) diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index 64ff826a8e..cf01739296 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -124,6 +124,7 @@ from ..flaubert.modeling_flaubert import ( ) from ..fsmt.modeling_fsmt import FSMTForConditionalGeneration, FSMTModel from ..funnel.modeling_funnel import ( + FunnelBaseModel, FunnelForMaskedLM, FunnelForMultipleChoice, FunnelForPreTraining, @@ -377,7 +378,7 @@ MODEL_MAPPING = OrderedDict( (CTRLConfig, CTRLModel), (ElectraConfig, ElectraModel), (ReformerConfig, ReformerModel), - (FunnelConfig, FunnelModel), + (FunnelConfig, (FunnelModel, FunnelBaseModel)), (LxmertConfig, LxmertModel), (BertGenerationConfig, BertGenerationEncoder), (DebertaConfig, DebertaModel), diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index 0abb08c890..2104bb6442 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -91,6 +91,7 @@ from ..flaubert.modeling_tf_flaubert import ( TFFlaubertWithLMHeadModel, ) from ..funnel.modeling_tf_funnel import ( + TFFunnelBaseModel, TFFunnelForMaskedLM, TFFunnelForMultipleChoice, TFFunnelForPreTraining, @@ -242,7 +243,7 @@ TF_MODEL_MAPPING = OrderedDict( (XLMConfig, TFXLMModel), (CTRLConfig, TFCTRLModel), (ElectraConfig, TFElectraModel), - (FunnelConfig, TFFunnelModel), + (FunnelConfig, (TFFunnelModel, TFFunnelBaseModel)), (DPRConfig, TFDPRQuestionEncoder), (MPNetConfig, TFMPNetModel), (BartConfig, TFBartModel), diff --git a/tests/test_modeling_albert.py b/tests/test_modeling_albert.py index 1859f51aa5..7f82c67ba0 100644 --- a/tests/test_modeling_albert.py +++ b/tests/test_modeling_albert.py @@ -17,6 +17,7 @@ import unittest from transformers import is_torch_available +from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -234,7 +235,7 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) diff --git a/tests/test_modeling_auto.py b/tests/test_modeling_auto.py index d395d9640d..0ba839c42a 100644 --- a/tests/test_modeling_auto.py +++ b/tests/test_modeling_auto.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import copy +import tempfile import unittest from transformers import is_torch_available @@ -46,6 +47,8 @@ if is_torch_available(): BertForSequenceClassification, BertForTokenClassification, BertModel, + FunnelBaseModel, + FunnelModel, GPT2Config, GPT2LMHeadModel, RobertaForMaskedLM, @@ -218,6 +221,21 @@ class AutoModelTest(unittest.TestCase): self.assertEqual(model.num_parameters(), 14410) self.assertEqual(model.num_parameters(only_trainable=True), 14410) + def test_from_pretrained_with_tuple_values(self): + # For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel + model = AutoModel.from_pretrained("sgugger/funnel-random-tiny") + self.assertIsInstance(model, FunnelModel) + + config = copy.deepcopy(model.config) + config.architectures = ["FunnelBaseModel"] + model = AutoModel.from_config(config) + self.assertIsInstance(model, FunnelBaseModel) + + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir) + model = AutoModel.from_pretrained(tmp_dir) + self.assertIsInstance(model, FunnelBaseModel) + def test_parents_and_children_in_mappings(self): # Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered # by the parents and will return the wrong configuration type when using auto models @@ -242,6 +260,12 @@ class AutoModelTest(unittest.TestCase): assert not issubclass( child_config, parent_config ), f"{child_config.__name__} is child of {parent_config.__name__}" - assert not issubclass( - child_model, parent_model - ), f"{child_config.__name__} is child of {parent_config.__name__}" + + # Tuplify child_model and parent_model since some of them could be tuples. + if not isinstance(child_model, (list, tuple)): + child_model = (child_model,) + if not isinstance(parent_model, (list, tuple)): + parent_model = (parent_model,) + + for child, parent in [(a, b) for a in child_model for b in parent_model]: + assert not issubclass(child, parent), f"{child.__name__} is child of {parent.__name__}" diff --git a/tests/test_modeling_bert.py b/tests/test_modeling_bert.py index 03f76c264b..97da4350ab 100755 --- a/tests/test_modeling_bert.py +++ b/tests/test_modeling_bert.py @@ -17,6 +17,7 @@ import unittest from transformers import is_torch_available +from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -444,7 +445,7 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) diff --git a/tests/test_modeling_big_bird.py b/tests/test_modeling_big_bird.py index 9a6a55108e..edef01f207 100644 --- a/tests/test_modeling_big_bird.py +++ b/tests/test_modeling_big_bird.py @@ -19,6 +19,7 @@ import unittest from tests.test_modeling_common import floats_tensor from transformers import is_torch_available +from transformers.models.auto import get_values from transformers.models.big_bird.tokenization_big_bird import BigBirdTokenizer from transformers.testing_utils import require_torch, slow, torch_device @@ -458,7 +459,7 @@ class BigBirdModelTest(ModelTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 9ce171e649..d5d76162bc 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -24,6 +24,7 @@ from typing import List, Tuple from transformers import is_torch_available from transformers.file_utils import WEIGHTS_NAME +from transformers.models.auto import get_values from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device @@ -79,7 +80,7 @@ class ModelTesterMixin: def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) - if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): + if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict = { k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous() if isinstance(v, torch.Tensor) and v.ndim > 1 @@ -88,9 +89,9 @@ class ModelTesterMixin: } if return_labels: - if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): + if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device) - elif model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): + elif model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING): inputs_dict["start_positions"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) @@ -98,18 +99,18 @@ class ModelTesterMixin: self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in [ - *MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(), - *MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values(), - *MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING.values(), + *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), + *get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING), + *get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING), ]: inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in [ - *MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(), - *MODEL_FOR_CAUSAL_LM_MAPPING.values(), - *MODEL_FOR_MASKED_LM_MAPPING.values(), - *MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(), + *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING), + *get_values(MODEL_FOR_CAUSAL_LM_MAPPING), + *get_values(MODEL_FOR_MASKED_LM_MAPPING), + *get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING), ]: inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device @@ -229,7 +230,7 @@ class ModelTesterMixin: config.return_dict = True for model_class in self.all_model_classes: - if model_class in MODEL_MAPPING.values(): + if model_class in get_values(MODEL_MAPPING): continue model = model_class(config) model.to(torch_device) @@ -248,7 +249,7 @@ class ModelTesterMixin: config.return_dict = True for model_class in self.all_model_classes: - if model_class in MODEL_MAPPING.values(): + if model_class in get_values(MODEL_MAPPING): continue model = model_class(config) model.to(torch_device) @@ -312,7 +313,7 @@ class ModelTesterMixin: if "labels" in inputs_dict: correct_outlen += 1 # loss is added to beginning # Question Answering model returns start_logits and end_logits - if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING): correct_outlen += 1 # start_logits and end_logits instead of only 1 output if "past_key_values" in outputs: correct_outlen += 1 # past_key_values have been returned diff --git a/tests/test_modeling_convbert.py b/tests/test_modeling_convbert.py index 610affc451..062a7f506a 100644 --- a/tests/test_modeling_convbert.py +++ b/tests/test_modeling_convbert.py @@ -19,6 +19,7 @@ import unittest from tests.test_modeling_common import floats_tensor from transformers import is_torch_available +from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -352,7 +353,7 @@ class ConvBertModelTest(ModelTesterMixin, unittest.TestCase): if "labels" in inputs_dict: correct_outlen += 1 # loss is added to beginning # Question Answering model returns start_logits and end_logits - if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING): correct_outlen += 1 # start_logits and end_logits instead of only 1 output if "past_key_values" in outputs: correct_outlen += 1 # past_key_values have been returned diff --git a/tests/test_modeling_electra.py b/tests/test_modeling_electra.py index 88138a587c..5935eafee6 100644 --- a/tests/test_modeling_electra.py +++ b/tests/test_modeling_electra.py @@ -17,6 +17,7 @@ import unittest from transformers import is_torch_available +from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -292,7 +293,7 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) diff --git a/tests/test_modeling_flax_bert.py b/tests/test_modeling_flax_bert.py index fc339f7501..273f55d157 100644 --- a/tests/test_modeling_flax_bert.py +++ b/tests/test_modeling_flax_bert.py @@ -29,6 +29,7 @@ if is_flax_available(): FlaxBertForNextSentencePrediction, FlaxBertForPreTraining, FlaxBertForQuestionAnswering, + FlaxBertForSequenceClassification, FlaxBertForTokenClassification, FlaxBertModel, ) @@ -125,6 +126,7 @@ class FlaxBertModelTest(FlaxModelTesterMixin, unittest.TestCase): FlaxBertForMultipleChoice, FlaxBertForQuestionAnswering, FlaxBertForNextSentencePrediction, + FlaxBertForSequenceClassification, FlaxBertForTokenClassification, FlaxBertForQuestionAnswering, ) diff --git a/tests/test_modeling_funnel.py b/tests/test_modeling_funnel.py index 0e3846cef1..4435359eb6 100644 --- a/tests/test_modeling_funnel.py +++ b/tests/test_modeling_funnel.py @@ -17,6 +17,7 @@ import unittest from transformers import FunnelTokenizer, is_torch_available +from transformers.models.auto import get_values from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -365,7 +366,7 @@ class FunnelModelTest(ModelTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) diff --git a/tests/test_modeling_led.py b/tests/test_modeling_led.py index 4166060145..caffe199bb 100644 --- a/tests/test_modeling_led.py +++ b/tests/test_modeling_led.py @@ -21,6 +21,7 @@ import unittest from transformers import is_torch_available from transformers.file_utils import cached_property +from transformers.models.auto import get_values from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -412,7 +413,7 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): if "labels" in inputs_dict: correct_outlen += 1 # loss is added to beginning # Question Answering model returns start_logits and end_logits - if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING): correct_outlen += 1 # start_logits and end_logits instead of only 1 output if "past_key_values" in outputs: correct_outlen += 1 # past_key_values have been returned diff --git a/tests/test_modeling_lxmert.py b/tests/test_modeling_lxmert.py index f05b3c3ee8..b03cc31335 100644 --- a/tests/test_modeling_lxmert.py +++ b/tests/test_modeling_lxmert.py @@ -18,6 +18,7 @@ import copy import unittest from transformers import is_torch_available +from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -532,11 +533,11 @@ class LxmertModelTest(ModelTesterMixin, unittest.TestCase): inputs_dict = copy.deepcopy(inputs_dict) if return_labels: - if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING): inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) - elif model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): + elif model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): # special case for models like BERT that use multi-loss training for PreTraining inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device diff --git a/tests/test_modeling_megatron_bert.py b/tests/test_modeling_megatron_bert.py index 3423f2d6f1..5be4716d33 100644 --- a/tests/test_modeling_megatron_bert.py +++ b/tests/test_modeling_megatron_bert.py @@ -21,6 +21,7 @@ import os import unittest from transformers import is_torch_available +from transformers.models.auto import get_values from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -290,7 +291,7 @@ class MegatronBertModelTest(ModelTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) diff --git a/tests/test_modeling_mobilebert.py b/tests/test_modeling_mobilebert.py index 9a0fc9ae96..96c974e2ed 100644 --- a/tests/test_modeling_mobilebert.py +++ b/tests/test_modeling_mobilebert.py @@ -17,6 +17,7 @@ import unittest from transformers import is_torch_available +from transformers.models.auto import get_values from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -272,7 +273,7 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) diff --git a/tests/test_modeling_tapas.py b/tests/test_modeling_tapas.py index b4f8f13231..b36147d558 100644 --- a/tests/test_modeling_tapas.py +++ b/tests/test_modeling_tapas.py @@ -32,6 +32,7 @@ from transformers import ( is_torch_available, ) from transformers.file_utils import cached_property +from transformers.models.auto import get_values from transformers.testing_utils import require_scatter, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -425,7 +426,7 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase): def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) - if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): + if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict = { k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous() if isinstance(v, torch.Tensor) and v.ndim > 1 @@ -434,9 +435,9 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase): } if return_labels: - if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): + if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device) - elif model_class in MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING.values(): + elif model_class in get_values(MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) @@ -457,17 +458,17 @@ class TapasModelTest(ModelTesterMixin, unittest.TestCase): self.model_tester.batch_size, dtype=torch.float, device=torch_device ) elif model_class in [ - *MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(), - *MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values(), + *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), + *get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING), ]: inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in [ - *MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(), - *MODEL_FOR_CAUSAL_LM_MAPPING.values(), - *MODEL_FOR_MASKED_LM_MAPPING.values(), - *MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(), + *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING), + *get_values(MODEL_FOR_CAUSAL_LM_MAPPING), + *get_values(MODEL_FOR_MASKED_LM_MAPPING), + *get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING), ]: inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device diff --git a/tests/test_modeling_tf_albert.py b/tests/test_modeling_tf_albert.py index aabd185f78..ab6b32ab84 100644 --- a/tests/test_modeling_tf_albert.py +++ b/tests/test_modeling_tf_albert.py @@ -17,6 +17,7 @@ import unittest from transformers import AlbertConfig, is_tf_available +from transformers.models.auto import get_values from transformers.testing_utils import require_tf, slow from .test_configuration_common import ConfigTester @@ -249,7 +250,7 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(TF_MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["sentence_order_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) return inputs_dict diff --git a/tests/test_modeling_tf_auto.py b/tests/test_modeling_tf_auto.py index ff80adc369..eb0b05f2c7 100644 --- a/tests/test_modeling_tf_auto.py +++ b/tests/test_modeling_tf_auto.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import copy +import tempfile import unittest from transformers import is_tf_available @@ -39,6 +40,8 @@ if is_tf_available(): TFBertForQuestionAnswering, TFBertForSequenceClassification, TFBertModel, + TFFunnelBaseModel, + TFFunnelModel, TFGPT2LMHeadModel, TFRobertaForMaskedLM, TFT5ForConditionalGeneration, @@ -176,6 +179,21 @@ class TFAutoModelTest(unittest.TestCase): self.assertEqual(model.num_parameters(), 14410) self.assertEqual(model.num_parameters(only_trainable=True), 14410) + def test_from_pretrained_with_tuple_values(self): + # For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel + model = TFAutoModel.from_pretrained("sgugger/funnel-random-tiny") + self.assertIsInstance(model, TFFunnelModel) + + config = copy.deepcopy(model.config) + config.architectures = ["FunnelBaseModel"] + model = TFAutoModel.from_config(config) + self.assertIsInstance(model, TFFunnelBaseModel) + + with tempfile.TemporaryDirectory() as tmp_dir: + model.save_pretrained(tmp_dir) + model = TFAutoModel.from_pretrained(tmp_dir) + self.assertIsInstance(model, TFFunnelBaseModel) + def test_parents_and_children_in_mappings(self): # Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered # by the parents and will return the wrong configuration type when using auto models @@ -197,4 +215,12 @@ class TFAutoModelTest(unittest.TestCase): for parent_config, parent_model in mapping[: index + 1]: with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"): self.assertFalse(issubclass(child_config, parent_config)) - self.assertFalse(issubclass(child_model, parent_model)) + + # Tuplify child_model and parent_model since some of them could be tuples. + if not isinstance(child_model, (list, tuple)): + child_model = (child_model,) + if not isinstance(parent_model, (list, tuple)): + parent_model = (parent_model,) + + for child, parent in [(a, b) for a in child_model for b in parent_model]: + assert not issubclass(child, parent), f"{child.__name__} is child of {parent.__name__}" diff --git a/tests/test_modeling_tf_bert.py b/tests/test_modeling_tf_bert.py index 8817ae2bc1..639ba0be9d 100644 --- a/tests/test_modeling_tf_bert.py +++ b/tests/test_modeling_tf_bert.py @@ -17,6 +17,7 @@ import unittest from transformers import BertConfig, is_tf_available +from transformers.models.auto import get_values from transformers.testing_utils import require_tf, slow from .test_configuration_common import ConfigTester @@ -282,7 +283,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.values(): + if model_class in get_values(TF_MODEL_FOR_PRETRAINING_MAPPING): inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) return inputs_dict diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index a2f7085660..51daf3779d 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -25,6 +25,7 @@ from importlib import import_module from typing import List, Tuple from transformers import is_tf_available +from transformers.models.auto import get_values from transformers.testing_utils import ( _tf_gpu_memory_limit, is_pt_tf_cross_test, @@ -89,7 +90,7 @@ class TFModelTesterMixin: def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict: inputs_dict = copy.deepcopy(inputs_dict) - if model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): + if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict = { k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1)) if isinstance(v, tf.Tensor) and v.ndim > 0 @@ -98,21 +99,21 @@ class TFModelTesterMixin: } if return_labels: - if model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): + if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32) - elif model_class in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): + elif model_class in get_values(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING): inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) - elif model_class in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(): + elif model_class in get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING): inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) - elif model_class in TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.values(): + elif model_class in get_values(TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING): inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) elif model_class in [ - *TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(), - *TF_MODEL_FOR_CAUSAL_LM_MAPPING.values(), - *TF_MODEL_FOR_MASKED_LM_MAPPING.values(), - *TF_MODEL_FOR_PRETRAINING_MAPPING.values(), - *TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(), + *get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING), + *get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING), + *get_values(TF_MODEL_FOR_MASKED_LM_MAPPING), + *get_values(TF_MODEL_FOR_PRETRAINING_MAPPING), + *get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING), ]: inputs_dict["labels"] = tf.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32 @@ -580,7 +581,7 @@ class TFModelTesterMixin: ), "input_ids": tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32"), } - elif model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): + elif model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): input_ids = tf.keras.Input(batch_shape=(4, 2, max_input), name="input_ids", dtype="int32") else: input_ids = tf.keras.Input(batch_shape=(2, max_input), name="input_ids", dtype="int32") @@ -796,9 +797,9 @@ class TFModelTesterMixin: def test_model_common_attributes(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() list_lm_models = ( - list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.values()) - + list(TF_MODEL_FOR_MASKED_LM_MAPPING.values()) - + list(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values()) + get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING) + + get_values(TF_MODEL_FOR_MASKED_LM_MAPPING) + + get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING) ) for model_class in self.all_model_classes: @@ -1128,7 +1129,7 @@ class TFModelTesterMixin: ] loss_size = tf.size(added_label) - if model.__class__ in TF_MODEL_FOR_CAUSAL_LM_MAPPING.values(): + if model.__class__ in get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING): # if loss is causal lm loss, labels are shift, so that one label per batch # is cut loss_size = loss_size - self.model_tester.batch_size diff --git a/utils/check_repo.py b/utils/check_repo.py index 9869133ce0..4fa45d7c66 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -19,6 +19,8 @@ import os import re from pathlib import Path +from transformers.models.auto import get_values + # All paths are set with the intent you should run this script from the root of the repo with the command # python utils/check_repo.py @@ -86,7 +88,6 @@ IGNORE_NON_AUTO_CONFIGURED = [ "DPRReader", "DPRSpanPredictor", "FlaubertForQuestionAnswering", - "FunnelBaseModel", "GPT2DoubleHeadsModel", "OpenAIGPTDoubleHeadsModel", "RagModel", @@ -95,7 +96,6 @@ IGNORE_NON_AUTO_CONFIGURED = [ "T5Stack", "TFDPRReader", "TFDPRSpanPredictor", - "TFFunnelBaseModel", "TFGPT2DoubleHeadsModel", "TFOpenAIGPTDoubleHeadsModel", "TFRagModel", @@ -153,7 +153,7 @@ def get_model_modules(): def get_models(module): """ Get the objects in module that are models.""" models = [] - model_classes = (transformers.PreTrainedModel, transformers.TFPreTrainedModel) + model_classes = (transformers.PreTrainedModel, transformers.TFPreTrainedModel, transformers.FlaxPreTrainedModel) for attr_name in dir(module): if "Pretrained" in attr_name or "PreTrained" in attr_name: continue @@ -249,10 +249,13 @@ def get_all_auto_configured_models(): result = set() # To avoid duplicates we concatenate all model classes in a set. for attr_name in dir(transformers.models.auto.modeling_auto): if attr_name.startswith("MODEL_") and attr_name.endswith("MAPPING"): - result = result | set(getattr(transformers.models.auto.modeling_auto, attr_name).values()) + result = result | set(get_values(getattr(transformers.models.auto.modeling_auto, attr_name))) for attr_name in dir(transformers.models.auto.modeling_tf_auto): if attr_name.startswith("TF_MODEL_") and attr_name.endswith("MAPPING"): - result = result | set(getattr(transformers.models.auto.modeling_tf_auto, attr_name).values()) + result = result | set(get_values(getattr(transformers.models.auto.modeling_tf_auto, attr_name))) + for attr_name in dir(transformers.models.auto.modeling_flax_auto): + if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING"): + result = result | set(get_values(getattr(transformers.models.auto.modeling_flax_auto, attr_name))) return [cls.__name__ for cls in result] From c2e0fd5283fa29bf0d0bed1fdbc9b4206e47c1d1 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 8 Apr 2021 15:46:54 -0700 Subject: [PATCH 8/8] [setup] make fairscale and deepspeed setup extras (#11151) * make fairscale and deepspeed setup extras * fix default * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * no reason not to ask for the good version * update the CIs Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- .github/workflows/self-scheduled.yml | 7 ++----- docs/source/main_classes/trainer.rst | 16 ++++++++++++++++ setup.py | 4 ++++ src/transformers/dependency_versions_check.py | 6 +++++- src/transformers/dependency_versions_table.py | 2 ++ src/transformers/integrations.py | 4 ++-- src/transformers/trainer.py | 10 ++++------ src/transformers/utils/versions.py | 6 ++++++ 8 files changed, 41 insertions(+), 14 deletions(-) diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index c49a967d2a..978d9e02a6 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -33,8 +33,7 @@ jobs: run: | apt -y update && apt install -y libsndfile1-dev pip install --upgrade pip - pip install .[sklearn,testing,onnxruntime,sentencepiece,speech] - pip install deepspeed + pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,deepspeed] - name: Are GPUs recognized by our DL frameworks run: | @@ -156,9 +155,7 @@ jobs: run: | apt -y update && apt install -y libsndfile1-dev pip install --upgrade pip - pip install .[sklearn,testing,onnxruntime,sentencepiece,speech] - pip install fairscale - pip install deepspeed + pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,deepspeed,fairscale] - name: Are GPUs recognized by our DL frameworks run: | diff --git a/docs/source/main_classes/trainer.rst b/docs/source/main_classes/trainer.rst index bc9f248827..10a7a9d54a 100644 --- a/docs/source/main_classes/trainer.rst +++ b/docs/source/main_classes/trainer.rst @@ -274,6 +274,14 @@ Install the library via pypi: pip install fairscale +or via ``transformers``' ``extras``: + +.. code-block:: bash + + pip install transformers[fairscale] + +(will become available starting from ``transformers==4.6.0``) + or find more details on `the FairScale's GitHub page `__. If you're still struggling with the build, first make sure to read :ref:`zero-install-notes`. @@ -419,6 +427,14 @@ Install the library via pypi: pip install deepspeed +or via ``transformers``' ``extras``: + +.. code-block:: bash + + pip install transformers[deepspeed] + +(will become available starting from ``transformers==4.6.0``) + or find more details on `the DeepSpeed's GitHub page `__ and `advanced install `__. diff --git a/setup.py b/setup.py index e942e65a7c..1b2ab5bf31 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,9 @@ _deps = [ "cookiecutter==1.7.2", "dataclasses", "datasets", + "deepspeed>0.3.13", "docutils==0.16.0", + "fairscale>0.3", "faiss-cpu", "fastapi", "filelock", @@ -233,6 +235,8 @@ extras["onnx"] = deps_list("onnxconverter-common", "keras2onnx") + extras["onnxr extras["modelcreation"] = deps_list("cookiecutter") extras["sagemaker"] = deps_list("sagemaker") +extras["deepspeed"] = deps_list("deepspeed") +extras["fairscale"] = deps_list("fairscale") extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette") extras["speech"] = deps_list("soundfile", "torchaudio") diff --git a/src/transformers/dependency_versions_check.py b/src/transformers/dependency_versions_check.py index 7e36aaef30..e6e676481d 100644 --- a/src/transformers/dependency_versions_check.py +++ b/src/transformers/dependency_versions_check.py @@ -14,7 +14,7 @@ import sys from .dependency_versions_table import deps -from .utils.versions import require_version_core +from .utils.versions import require_version, require_version_core # define which module versions we always want to check at run time @@ -41,3 +41,7 @@ for pkg in pkgs_to_check_at_runtime: require_version_core(deps[pkg]) else: raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py") + + +def dep_version_check(pkg, hint=None): + require_version(deps[pkg], hint) diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 43f4c028fe..bd070d7bdf 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -7,7 +7,9 @@ deps = { "cookiecutter": "cookiecutter==1.7.2", "dataclasses": "dataclasses", "datasets": "datasets", + "deepspeed": "deepspeed>0.3.13", "docutils": "docutils==0.16.0", + "fairscale": "fairscale>0.3", "faiss-cpu": "faiss-cpu", "fastapi": "fastapi", "filelock": "filelock", diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index 65824c25ca..7e4ab0f5c7 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -24,8 +24,8 @@ import tempfile from copy import deepcopy from pathlib import Path +from .dependency_versions_check import dep_version_check from .utils import logging -from .utils.versions import require_version logger = logging.get_logger(__name__) @@ -324,7 +324,7 @@ def deepspeed_parse_config(ds_config): If it's already a dict, return a copy of it, so that we can freely modify it. """ - require_version("deepspeed>0.3.13") + dep_version_check("deepspeed") if isinstance(ds_config, dict): # Don't modify user's data should they want to reuse it (e.g. in tests), because once we diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index dc31164331..41800b7fd3 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -54,6 +54,7 @@ from torch.utils.data.distributed import DistributedSampler from torch.utils.data.sampler import RandomSampler, SequentialSampler from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator +from .dependency_versions_check import dep_version_check from .file_utils import ( WEIGHTS_NAME, is_apex_available, @@ -139,17 +140,14 @@ if is_torch_tpu_available(): import torch_xla.distributed.parallel_loader as pl if is_fairscale_available(): + dep_version_check("fairscale") import fairscale + from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP + from fairscale.nn.wrap import auto_wrap from fairscale.optim import OSS from fairscale.optim.grad_scaler import ShardedGradScaler - if version.parse(fairscale.__version__) >= version.parse("0.3"): - from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP - from fairscale.nn.wrap import auto_wrap - else: - FullyShardedDDP = None - if is_sagemaker_dp_enabled(): import smdistributed.dataparallel.torch.distributed as dist from smdistributed.dataparallel.torch.parallel.distributed import DistributedDataParallel as DDP diff --git a/src/transformers/utils/versions.py b/src/transformers/utils/versions.py index b573a361b9..73151487bc 100644 --- a/src/transformers/utils/versions.py +++ b/src/transformers/utils/versions.py @@ -60,6 +60,12 @@ def require_version(requirement: str, hint: Optional[str] = None) -> None: Args: requirement (:obj:`str`): pip style definition, e.g., "tokenizers==0.9.4", "tqdm>=4.27", "numpy" hint (:obj:`str`, `optional`): what suggestion to print in case of requirements not being met + + Example:: + + require_version("pandas>1.1.2") + require_version("numpy>1.18.5", "this is important to have for whatever reason") + """ hint = f"\n{hint}" if hint is not None else ""