Simplify soft dependencies and update the dummy-creation process (#36827)

* Reverse dependency map shouldn't be created when test_all is set

* [test_all] Remove dummies

* Modular fixes

* Update utils/check_repo.py

Co-authored-by: Pablo Montalvo <39954772+molbap@users.noreply.github.com>

* [test_all] Better docs

* [test_all] Update src/transformers/commands/chat.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* [test_all] Remove deprecated AdaptiveEmbeddings from the tests

* [test_all] Doc builder

* [test_all] is_dummy

* [test_all] Import utils

* [test_all] Doc building should not require all deps

---------

Co-authored-by: Pablo Montalvo <39954772+molbap@users.noreply.github.com>
Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>
This commit is contained in:
Lysandre Debut
2025-04-11 11:08:36 +02:00
committed by GitHub
parent 931126b929
commit 54a123f068
295 changed files with 2167 additions and 27702 deletions

View File

@@ -279,25 +279,6 @@ def analyze_results(import_dict_objects: Dict[str, List[str]], type_hint_objects
return errors
def check_all_inits():
"""
Check all inits in the transformers repo and raise an error if at least one does not define the same objects in
both halves.
"""
failures = []
for root, _, files in os.walk(PATH_TO_TRANSFORMERS):
if "__init__.py" in files:
fname = os.path.join(root, "__init__.py")
objects = parse_init(fname)
if objects is not None:
errors = analyze_results(*objects)
if len(errors) > 0:
errors[0] = f"Problem in {fname}, both halves do not define the same objects.\n{errors[0]}"
failures.append("\n".join(errors))
if len(failures) > 0:
raise ValueError("\n\n".join(failures))
def get_transformers_submodules() -> List[str]:
"""
Returns the list of Transformers submodules.
@@ -370,5 +351,5 @@ def check_submodules():
if __name__ == "__main__":
check_all_inits()
check_submodules()
# This entire files needs an overhaul
pass

View File

@@ -38,11 +38,12 @@ import types
import warnings
from collections import OrderedDict
from difflib import get_close_matches
from importlib.machinery import ModuleSpec
from pathlib import Path
from typing import List, Tuple
from transformers import is_flax_available, is_tf_available, is_torch_available
from transformers.models.auto import get_values
from transformers.models.auto.auto_factory import get_values
from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
from transformers.models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING_NAMES
from transformers.models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING_NAMES
@@ -412,6 +413,8 @@ def check_model_list():
Checks the model listed as subfolders of `models` match the models available in `transformers.models`.
"""
# Get the models from the directory structure of `src/transformers/models/`
import transformers as tfrs
models_dir = os.path.join(PATH_TO_TRANSFORMERS, "models")
_models = []
for model in os.listdir(models_dir):
@@ -419,10 +422,15 @@ def check_model_list():
continue
model_dir = os.path.join(models_dir, model)
if os.path.isdir(model_dir) and "__init__.py" in os.listdir(model_dir):
# If the init is empty, and there are only two files, it's likely that there's just a conversion
# script. Those should not be in the init.
if (Path(model_dir) / "__init__.py").read_text().strip() == "":
continue
_models.append(model)
# Get the models in the submodule `transformers.models`
models = [model for model in dir(transformers.models) if not model.startswith("__")]
models = [model for model in dir(tfrs.models) if not model.startswith("__")]
missing_models = sorted(set(_models).difference(models))
if missing_models:
@@ -454,7 +462,7 @@ def get_model_modules() -> List[str]:
modules = []
for model in dir(transformers.models):
# There are some magic dunder attributes in the dir, we ignore them
if model == "deprecated" or model.startswith("__"):
if "deprecated" in model or model.startswith("__"):
continue
model_module = getattr(transformers.models, model)
@@ -836,6 +844,8 @@ def check_objects_being_equally_in_main_init():
failures = []
for attr in attrs:
obj = getattr(transformers, attr)
if hasattr(obj, "__module__") and isinstance(obj.__module__, ModuleSpec):
continue
if not hasattr(obj, "__module__") or "models.deprecated" in obj.__module__:
continue
@@ -1010,6 +1020,7 @@ UNDOCUMENTED_OBJECTS = [
"AltRobertaModel", # Internal module
"VitPoseBackbone", # Internal module
"VitPoseBackboneConfig", # Internal module
"get_values", # Internal object
]
# This list should be empty. Objects in it should get their own doc page.
@@ -1053,6 +1064,7 @@ def ignore_undocumented(name: str) -> bool:
or name.endswith("Layer")
or name.endswith("Embeddings")
or name.endswith("Attention")
or name.endswith("OnnxConfig")
):
return True
# Submodules are not documented.

View File

@@ -953,7 +953,6 @@ src/transformers/utils/doc.py
src/transformers/utils/dummy_detectron2_objects.py
src/transformers/utils/dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py
src/transformers/utils/dummy_flax_objects.py
src/transformers/utils/dummy_keras_nlp_objects.py
src/transformers/utils/dummy_music_objects.py
src/transformers/utils/dummy_pt_objects.py
src/transformers/utils/dummy_sentencepiece_and_tokenizers_objects.py

View File

@@ -741,10 +741,13 @@ def get_module_dependencies(module_fname: str, cache: Dict[str, List[str]] = Non
# Add imports via `define_import_structure` after the #35167 as we remove explicit import in `__init__.py`
from transformers.utils.import_utils import define_import_structure
new_imported_modules_2 = define_import_structure(PATH_TO_REPO / module)
new_imported_modules_from_import_structure = define_import_structure(PATH_TO_REPO / module)
for mapping in new_imported_modules_2.values():
for mapping in new_imported_modules_from_import_structure.values():
for _module, _imports in mapping.items():
# Import Structure returns _module keys as import paths rather than local paths
# We replace with os.path.sep so that it's Windows-compatible
_module = _module.replace(".", os.path.sep)
_module = module.replace("__init__.py", f"{_module}.py")
new_imported_modules.append((_module, list(_imports)))
@@ -1038,18 +1041,18 @@ def infer_tests_to_run(
"""
if not test_all:
modified_files = get_modified_python_files(diff_with_last_commit=diff_with_last_commit)
reverse_map = create_reverse_dependency_map()
impacted_files = modified_files.copy()
for f in modified_files:
if f in reverse_map:
impacted_files.extend(reverse_map[f])
else:
modified_files = [str(k) for k in PATH_TO_TESTS.glob("*/*") if str(k).endswith(".py") and "test_" in str(k)]
impacted_files = modified_files = [
str(k) for k in PATH_TO_TESTS.glob("*/*") if str(k).endswith(".py") and "test_" in str(k)
]
print("\n### test_all is TRUE, FETCHING ALL FILES###\n")
print(f"\n### MODIFIED FILES ###\n{_print_list(modified_files)}")
# Create the map that will give us all impacted modules.
reverse_map = create_reverse_dependency_map()
impacted_files = modified_files.copy()
for f in modified_files:
if f in reverse_map:
impacted_files.extend(reverse_map[f])
# Remove duplicates
impacted_files = sorted(set(impacted_files))
print(f"\n### IMPACTED FILES ###\n{_print_list(impacted_files)}")