From 0f96c26de6171a996094a00d2126575c0a457fba Mon Sep 17 00:00:00 2001 From: Connor Henderson Date: Thu, 16 Feb 2023 11:32:32 -0500 Subject: [PATCH] refactor: Make direct_transformers_import util (#21652) * refactor: Make direct_import util * edit direct import fn * add docstring * make import function specific to transformers only * edit doc string --- src/transformers/processing_utils.py | 11 ++--------- src/transformers/utils/__init__.py | 1 + src/transformers/utils/import_utils.py | 19 +++++++++++++++++++ tests/pipelines/test_pipelines_common.py | 12 ++---------- utils/check_config_attributes.py | 13 +++---------- utils/check_config_docstrings.py | 14 +++----------- utils/check_copies.py | 13 +++---------- utils/check_inits.py | 13 +++---------- utils/check_repo.py | 13 ++----------- utils/check_table.py | 13 +++---------- utils/check_task_guides.py | 13 +++---------- utils/update_metadata.py | 13 +++---------- 12 files changed, 47 insertions(+), 101 deletions(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 2cb4837e04..09484cd132 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -16,25 +16,18 @@ Processing saving/loading class for common processors. """ -import importlib.util import os -import sys from pathlib import Path from .dynamic_module_utils import custom_object_save from .tokenization_utils_base import PreTrainedTokenizerBase -from .utils import PushToHubMixin, copy_func, logging +from .utils import PushToHubMixin, copy_func, direct_transformers_import, logging logger = logging.get_logger(__name__) # Dynamically import the Transformers module to grab the attribute classes of the processor form their names. -spec = importlib.util.spec_from_file_location( - "transformers", Path(__file__).parent / "__init__.py", submodule_search_locations=[Path(__file__).parent] -) -transformers_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers_module) -transformers_module = sys.modules["transformers"] +transformers_module = direct_transformers_import(Path(__file__).parent) AUTO_TO_BASE_CLASS_MAPPING = { diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 3bab7e274a..26371782f8 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -94,6 +94,7 @@ from .import_utils import ( OptionalDependencyNotAvailable, _LazyModule, ccl_version, + direct_transformers_import, is_accelerate_available, is_apex_available, is_bitsandbytes_available, diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index dd40002bbb..628605aaf9 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -1129,3 +1129,22 @@ class _LazyModule(ModuleType): class OptionalDependencyNotAvailable(BaseException): """Internally used error class for signalling an optional dependency was not found.""" + + +def direct_transformers_import(path: str, file="__init__.py") -> ModuleType: + """Imports transformers directly + + Args: + path (`str`): The path to the source file + file (`str`, optional): The file to join with the path. Defaults to "__init__.py". + + Returns: + `ModuleType`: The resulting imported module + """ + name = "transformers" + location = os.path.join(path, file) + spec = importlib.util.spec_from_file_location(name, location, submodule_search_locations=[path]) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + module = sys.modules[name] + return module diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 5f63caf6be..6c61909527 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -13,7 +13,6 @@ # limitations under the License. import copy -import importlib import logging import os import random @@ -53,7 +52,7 @@ from transformers.testing_utils import ( require_torch_or_tf, slow, ) -from transformers.utils import is_tf_available, is_torch_available +from transformers.utils import direct_transformers_import, is_tf_available, is_torch_available from transformers.utils import logging as transformers_logging @@ -69,14 +68,7 @@ PATH_TO_TRANSFORMERS = os.path.join(Path(__file__).parent.parent.parent, "src/tr # Dynamically import the Transformers module to grab the attribute classes of the processor form their names. -spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), - submodule_search_locations=[PATH_TO_TRANSFORMERS], -) -transformers_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers_module) -transformers_module = sys.modules["transformers"] +transformers_module = direct_transformers_import(PATH_TO_TRANSFORMERS) class ANY: diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index d60e393dfb..93948cc2b9 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -13,11 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import importlib import inspect import os import re -import sys + +from transformers.utils import direct_transformers_import # All paths are set with the intent you should run this script from the root of the repo with the command @@ -26,14 +26,7 @@ PATH_TO_TRANSFORMERS = "src/transformers" # This is to make sure the transformers module imported is the one in the repo. -spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), - submodule_search_locations=[PATH_TO_TRANSFORMERS], -) -transformers = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers) -transformers = sys.modules["transformers"] +transformers = direct_transformers_import(PATH_TO_TRANSFORMERS) CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING diff --git a/utils/check_config_docstrings.py b/utils/check_config_docstrings.py index de1c7cc036..8c00574806 100644 --- a/utils/check_config_docstrings.py +++ b/utils/check_config_docstrings.py @@ -13,11 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import importlib import inspect -import os import re -import sys + +from transformers.utils import direct_transformers_import # All paths are set with the intent you should run this script from the root of the repo with the command @@ -26,14 +25,7 @@ PATH_TO_TRANSFORMERS = "src/transformers" # This is to make sure the transformers module imported is the one in the repo. -spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), - submodule_search_locations=[PATH_TO_TRANSFORMERS], -) -transformers = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers) -transformers = sys.modules["transformers"] +transformers = direct_transformers_import(PATH_TO_TRANSFORMERS) CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING diff --git a/utils/check_copies.py b/utils/check_copies.py index 54ca2d67b5..d32df3b870 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -15,14 +15,14 @@ import argparse import glob -import importlib.util import os import re -import sys import black from doc_builder.style_doc import style_docstrings_in_code +from transformers.utils import direct_transformers_import + # All paths are set with the intent you should run this script from the root of the repo with the command # python utils/check_copies.py @@ -99,14 +99,7 @@ LOCALIZED_READMES = { # This is to make sure the transformers module imported is the one in the repo. -spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(TRANSFORMERS_PATH, "__init__.py"), - submodule_search_locations=[TRANSFORMERS_PATH], -) -transformers_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers_module) -transformers_module = sys.modules["transformers"] +transformers_module = direct_transformers_import(TRANSFORMERS_PATH) def _should_continue(line, indent): diff --git a/utils/check_inits.py b/utils/check_inits.py index e5044487f8..d90db7733d 100644 --- a/utils/check_inits.py +++ b/utils/check_inits.py @@ -14,10 +14,8 @@ # limitations under the License. import collections -import importlib.util import os import re -import sys from pathlib import Path @@ -275,14 +273,9 @@ IGNORE_SUBMODULES = [ def check_submodules(): # This is to make sure the transformers module imported is the one in the repo. - spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), - submodule_search_locations=[PATH_TO_TRANSFORMERS], - ) - transformers = importlib.util.module_from_spec(spec) - spec.loader.exec_module(transformers) - transformers = sys.modules["transformers"] + from transformers.utils import direct_transformers_import + + transformers = direct_transformers_import(PATH_TO_TRANSFORMERS) module_not_registered = [ module diff --git a/utils/check_repo.py b/utils/check_repo.py index 641baee93e..2590cfab7f 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -13,11 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import importlib import inspect import os import re -import sys import warnings from collections import OrderedDict from difflib import get_close_matches @@ -25,7 +23,7 @@ from pathlib import Path from transformers import is_flax_available, is_tf_available, is_torch_available from transformers.models.auto import get_values -from transformers.utils import ENV_VARS_TRUE_VALUES +from transformers.utils import ENV_VARS_TRUE_VALUES, direct_transformers_import # All paths are set with the intent you should run this script from the root of the repo with the command @@ -307,14 +305,7 @@ MODEL_TYPE_TO_DOC_MAPPING = OrderedDict( # This is to make sure the transformers module imported is the one in the repo. -spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), - submodule_search_locations=[PATH_TO_TRANSFORMERS], -) -transformers = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers) -transformers = sys.modules["transformers"] +transformers = direct_transformers_import(PATH_TO_TRANSFORMERS) def check_model_list(): diff --git a/utils/check_table.py b/utils/check_table.py index 3307209b66..e7e31cfee3 100644 --- a/utils/check_table.py +++ b/utils/check_table.py @@ -15,10 +15,10 @@ import argparse import collections -import importlib.util import os import re -import sys + +from transformers.utils import direct_transformers_import # All paths are set with the intent you should run this script from the root of the repo with the command @@ -64,14 +64,7 @@ _re_pt_models = re.compile(r"(.*)(?:Model|Encoder|Decoder|ForConditionalGenerati # This is to make sure the transformers module imported is the one in the repo. -spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(TRANSFORMERS_PATH, "__init__.py"), - submodule_search_locations=[TRANSFORMERS_PATH], -) -transformers_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers_module) -transformers_module = sys.modules["transformers"] +transformers_module = direct_transformers_import(TRANSFORMERS_PATH) # Thanks to https://stackoverflow.com/questions/29916065/how-to-do-camelcase-split-in-python diff --git a/utils/check_task_guides.py b/utils/check_task_guides.py index b7975cc4a2..c48b7ca16c 100644 --- a/utils/check_task_guides.py +++ b/utils/check_task_guides.py @@ -14,9 +14,9 @@ # limitations under the License. import argparse -import importlib.util import os -import sys + +from transformers.utils import direct_transformers_import # All paths are set with the intent you should run this script from the root of the repo with the command @@ -52,14 +52,7 @@ def _find_text_in_file(filename, start_prompt, end_prompt): # This is to make sure the transformers module imported is the one in the repo. -spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(TRANSFORMERS_PATH, "__init__.py"), - submodule_search_locations=[TRANSFORMERS_PATH], -) -transformers_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers_module) -transformers_module = sys.modules["transformers"] +transformers_module = direct_transformers_import(TRANSFORMERS_PATH) TASK_GUIDE_TO_MODELS = { "asr.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING_NAMES, diff --git a/utils/update_metadata.py b/utils/update_metadata.py index e6bc4d8593..6aeb767375 100644 --- a/utils/update_metadata.py +++ b/utils/update_metadata.py @@ -15,16 +15,16 @@ import argparse import collections -import importlib.util import os import re -import sys import tempfile import pandas as pd from datasets import Dataset from huggingface_hub import Repository +from transformers.utils import direct_transformers_import + # All paths are set with the intent you should run this script from the root of the repo with the command # python utils/update_metadata.py @@ -32,14 +32,7 @@ TRANSFORMERS_PATH = "src/transformers" # This is to make sure the transformers module imported is the one in the repo. -spec = importlib.util.spec_from_file_location( - "transformers", - os.path.join(TRANSFORMERS_PATH, "__init__.py"), - submodule_search_locations=[TRANSFORMERS_PATH], -) -transformers_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(transformers_module) -transformers_module = sys.modules["transformers"] +transformers_module = direct_transformers_import(TRANSFORMERS_PATH) # Regexes that match TF/Flax/PT model names.