From cf243a1bf85e2197dac2cfc1f9b23c0e99493fa2 Mon Sep 17 00:00:00 2001 From: Manuel de Prada Corral <6536835+manueldeprada@users.noreply.github.com> Date: Wed, 6 Aug 2025 19:37:25 +0200 Subject: [PATCH] Fix `fix_and_overwrite` mode of `utils/check_docstring.py` (#39369) * bug in fix mode of check_docstring --- utils/check_docstrings.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 23ba44958d..8878491e4e 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -823,6 +823,7 @@ def match_docstring_with_signature(obj: Any) -> Optional[tuple[str, str]]: except OSError: source = [] + # Find the line where the docstring starts idx = 0 while idx < len(source) and '"""' not in source[idx]: idx += 1 @@ -830,9 +831,11 @@ def match_docstring_with_signature(obj: Any) -> Optional[tuple[str, str]]: ignore_order = False if idx < len(source): line_before_docstring = source[idx - 1] + # Match '# no-format' (allowing surrounding whitespaces) if re.search(r"^\s*#\s*no-format\s*$", line_before_docstring): - # This object is ignored + # This object is ignored by the auto-docstring tool return + # Match '# ignore-order' (allowing surrounding whitespaces) elif re.search(r"^\s*#\s*ignore-order\s*$", line_before_docstring): ignore_order = True @@ -959,14 +962,15 @@ def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str): idx -= 1 idx += 1 - if "".join(source[start_idx:idx])[:-1] != old_doc_args: + # `old_doc_args` is built from `obj.__doc__`, which may have + # different indentation than the raw source from `inspect.getsourcelines`. + # We use `inspect.cleandoc` to remove indentation uniformly from both + # strings before comparing them. + source_args_as_str = "".join(source[start_idx:idx]) + if inspect.cleandoc(source_args_as_str) != inspect.cleandoc(old_doc_args): # Args are not fully defined in the docstring of this object - # This can happen due to a mismatch in indentation calculation where the docstring parsing - # in match_docstring_with_signature uses obj.__doc__.split("\n") while here we use - # inspect.getsourcelines(obj) which can have different line endings or indentation. - # See https://github.com/huggingface/transformers/pull/38915/files#r2200675302 for more details. obj_file = find_source_file(obj) - actual_args_section = "".join(source[start_idx:idx])[:-1] + actual_args_section = source_args_as_str.rstrip() raise ValueError( f"Cannot fix docstring of {obj.__name__} in {obj_file} because the argument section in the source code " f"does not match the expected format. This usually happens when:\n" @@ -983,6 +987,10 @@ def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str): # Replace content lines = content.split("\n") + prev_line_indentation = find_indent(lines[line_number + start_idx - 2]) + # Now increase the indentation of every line in new_doc_args by prev_line_indentation + new_doc_args = "\n".join([f"{' ' * prev_line_indentation}{line}" for line in new_doc_args.split("\n")]) + lines = lines[: line_number + start_idx - 1] + [new_doc_args] + lines[line_number + idx - 1 :] print(f"Fixing the docstring of {obj.__name__} in {obj_file}.")