modular_model_converter bugfix on assignments (#35642)
* added bugfix in modular converter to keep modular assignments for docstrings, expected outputs etc. * revert stracoder2 docstring copying, add forward in EMU3 to enable docstring assingment, remove verbatim assignments in modular converter * added _FOR_DOC in assignments to keep, corrected wrong checkpoint name in ijepa's configuration
This commit is contained in:
@@ -268,7 +268,7 @@ def merge_docstrings(original_docstring, updated_docstring):
|
||||
updated_docstring = "".join(
|
||||
[
|
||||
parts[0].rstrip(" \n") + new_parts[0],
|
||||
f"\n{original_level*' '}```",
|
||||
f"\n{original_level * ' '}```",
|
||||
parts[1],
|
||||
"```",
|
||||
parts[2],
|
||||
@@ -515,10 +515,8 @@ def find_all_dependencies(
|
||||
return all_dependencies_with_parent
|
||||
|
||||
|
||||
# These top-level variables will always use the value in the `modular_xxx.py` file
|
||||
ASSIGNMENTS_TO_KEEP = {
|
||||
"_CHECKPOINT_FOR_DOC",
|
||||
}
|
||||
# Top-level variables that match the following patterns will always use the value in the `modular_xxx.py` file
|
||||
ASSIGNMENTS_REGEX_TO_KEEP = [r"_CHECKPOINT", r"_EXPECTED", r"_FOR_DOC"]
|
||||
|
||||
|
||||
class ClassDependencyMapper(CSTVisitor):
|
||||
@@ -828,12 +826,14 @@ class ModelFileMapper(ModuleMapper):
|
||||
def _merge_assignments(self, assignments: dict[str, cst.CSTNode], object_mapping: dict[str, set]):
|
||||
"""Update the global nodes with the assignment from the modular file.
|
||||
|
||||
Merging rule: if any assignment with the same name was redefined in the modular, we use it and its dependencies ONLY if it is
|
||||
in `ASSIGNMENTS_TO_KEEP`. Otherwise, we use the original value and dependencies. This rule was chosen to avoid having to rewrite the
|
||||
Merging rule: if any assignment with the same name was redefined in the modular, we use it and its dependencies ONLY if it matches
|
||||
a pattern in `ASSIGNMENTS_REGEX_TO_KEEP`. Otherwise, we use the original value and dependencies. This rule was chosen to avoid having to rewrite the
|
||||
big docstrings.
|
||||
"""
|
||||
for assignment, node in assignments.items():
|
||||
if assignment in ASSIGNMENTS_TO_KEEP or assignment not in self.assignments:
|
||||
should_keep = any(re.search(pattern, assignment) for pattern in ASSIGNMENTS_REGEX_TO_KEEP)
|
||||
|
||||
if should_keep or assignment not in self.assignments:
|
||||
self.assignments[assignment] = node
|
||||
if assignment in object_mapping:
|
||||
self.object_dependency_mapping[assignment] = object_mapping[assignment]
|
||||
@@ -1404,7 +1404,7 @@ class ModularFileMapper(ModuleMapper):
|
||||
]
|
||||
if len(modeling_bases) > 1:
|
||||
raise ValueError(
|
||||
f"{class_name} was defined with more than 1 model-specific super class. This is unsupported. We found {*modeling_bases,}."
|
||||
f"{class_name} was defined with more than 1 model-specific super class. This is unsupported. We found {(*modeling_bases,)}."
|
||||
)
|
||||
if len(modeling_bases) == 1:
|
||||
filename = self.model_specific_imported_objects[modeling_bases[0]]
|
||||
@@ -1432,7 +1432,7 @@ class ModularFileMapper(ModuleMapper):
|
||||
if final_name != cased_default_name and has_prefix_collision:
|
||||
if len(prefixes_counter) > 1:
|
||||
logger.warning(
|
||||
f"We detected multiple prefix names when inheriting from {file}: {*set(prefixes_counter),}. However, the "
|
||||
f"We detected multiple prefix names when inheriting from {file}: {(*set(prefixes_counter),)}. However, the "
|
||||
f"most used one, '{final_name}', is already present in the source file and will likely cause consistency "
|
||||
f"issues. For this reason we fallback to the default prefix '{cased_default_name}' when grabbing args "
|
||||
"and dependencies. Make sure to subclass the intermediate classes with the prefix you want (if different "
|
||||
@@ -1448,7 +1448,7 @@ class ModularFileMapper(ModuleMapper):
|
||||
final_name = cased_default_name
|
||||
elif len(prefixes_counter) > 1:
|
||||
logger.warning(
|
||||
f"We detected multiple prefix names when inheriting from {file}: {*set(prefixes_counter),}. We will only "
|
||||
f"We detected multiple prefix names when inheriting from {file}: {(*set(prefixes_counter),)}. We will only "
|
||||
f"use the most used '{final_name}' prefix when grabbing args and dependencies. Make sure to subclass the "
|
||||
f"intermediate classes with the prefix you want (if different from '{final_name}') or use a single prefix "
|
||||
"in all the modular (best)."
|
||||
|
||||
Reference in New Issue
Block a user