[modular] speedup check_modular_conversion with multiprocessing (#37456)

* Change topological sort to return level-based output (lists of lists)

* Update main for modular converter

* Update test

* update check_modular_conversion

* Update gitignore

* Fix missing conversion for glm4

* Update

* Fix error msg

* Fixup

* fix docstring

* update docs

* Add comment

* delete qwen3_moe
This commit is contained in:
Pavel Iakubovskii
2025-07-10 19:07:59 +01:00
committed by GitHub
parent 571a8c2131
commit fe1a5b73e6
5 changed files with 125 additions and 45 deletions

View File

@@ -3,7 +3,19 @@ from collections import defaultdict
# Function to perform topological sorting
def topological_sort(dependencies: dict):
def topological_sort(dependencies: dict) -> list[list[str]]:
"""Given the dependencies graph construct sorted list of list of modular files
For example, returned list of lists might be:
[
["../modular_llama.py", "../modular_gemma.py"], # level 0
["../modular_llama4.py", "../modular_gemma2.py"], # level 1
["../modular_glm4.py"], # level 2
]
which means llama and gemma do not depend on any other modular models, while llama4 and gemma2
depend on the models in the first list, and glm4 depends on the models in the second and (optionally) in the first list.
"""
# Nodes are the name of the models to convert (we only add those to the graph)
nodes = {node.rsplit("modular_", 1)[1].replace(".py", "") for node in dependencies.keys()}
# This will be a graph from models to convert, to models to convert that should be converted before (as they are a dependency)
@@ -20,12 +32,12 @@ def topological_sort(dependencies: dict):
while len(graph) > 0:
# Find the nodes with 0 out-degree
leaf_nodes = {node for node in graph if len(graph[node]) == 0}
# Add them to the list
sorting_list += list(leaf_nodes)
# Add them to the list as next level
sorting_list.append([name_mapping[node] for node in leaf_nodes])
# Remove the leafs from the graph (and from the deps of other nodes)
graph = {node: deps - leaf_nodes for node, deps in graph.items() if node not in leaf_nodes}
return [name_mapping[x] for x in sorting_list]
return sorting_list
# Function to extract class and import info from a file
@@ -63,7 +75,16 @@ def find_priority_list(py_files):
py_files: List of paths to the modular files
Returns:
A tuple with the ordered files (list) and their dependencies (dict)
Ordered list of lists of files and their dependencies (dict)
For example, ordered_files might be:
[
["../modular_llama.py", "../modular_gemma.py"], # level 0
["../modular_llama4.py", "../modular_gemma2.py"], # level 1
["../modular_glm4.py"], # level 2
]
which means llama and gemma do not depend on any other modular models, while llama4 and gemma2
depend on the models in the first list, and glm4 depends on the models in the second and (optionally) in the first list.
"""
dependencies = map_dependencies(py_files)
ordered_files = topological_sort(dependencies)