Fix fix_and_overwrite mode of utils/check_docstring.py (#39369)

* bug in fix mode of check_docstring
This commit is contained in:
Manuel de Prada Corral
2025-08-06 19:37:25 +02:00
committed by GitHub
parent 6902ffa505
commit cf243a1bf8

View File

@@ -823,6 +823,7 @@ def match_docstring_with_signature(obj: Any) -> Optional[tuple[str, str]]:
except OSError: except OSError:
source = [] source = []
# Find the line where the docstring starts
idx = 0 idx = 0
while idx < len(source) and '"""' not in source[idx]: while idx < len(source) and '"""' not in source[idx]:
idx += 1 idx += 1
@@ -830,9 +831,11 @@ def match_docstring_with_signature(obj: Any) -> Optional[tuple[str, str]]:
ignore_order = False ignore_order = False
if idx < len(source): if idx < len(source):
line_before_docstring = source[idx - 1] line_before_docstring = source[idx - 1]
# Match '# no-format' (allowing surrounding whitespaces)
if re.search(r"^\s*#\s*no-format\s*$", line_before_docstring): if re.search(r"^\s*#\s*no-format\s*$", line_before_docstring):
# This object is ignored # This object is ignored by the auto-docstring tool
return return
# Match '# ignore-order' (allowing surrounding whitespaces)
elif re.search(r"^\s*#\s*ignore-order\s*$", line_before_docstring): elif re.search(r"^\s*#\s*ignore-order\s*$", line_before_docstring):
ignore_order = True ignore_order = True
@@ -959,14 +962,15 @@ def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str):
idx -= 1 idx -= 1
idx += 1 idx += 1
if "".join(source[start_idx:idx])[:-1] != old_doc_args: # `old_doc_args` is built from `obj.__doc__`, which may have
# different indentation than the raw source from `inspect.getsourcelines`.
# We use `inspect.cleandoc` to remove indentation uniformly from both
# strings before comparing them.
source_args_as_str = "".join(source[start_idx:idx])
if inspect.cleandoc(source_args_as_str) != inspect.cleandoc(old_doc_args):
# Args are not fully defined in the docstring of this object # Args are not fully defined in the docstring of this object
# This can happen due to a mismatch in indentation calculation where the docstring parsing
# in match_docstring_with_signature uses obj.__doc__.split("\n") while here we use
# inspect.getsourcelines(obj) which can have different line endings or indentation.
# See https://github.com/huggingface/transformers/pull/38915/files#r2200675302 for more details.
obj_file = find_source_file(obj) obj_file = find_source_file(obj)
actual_args_section = "".join(source[start_idx:idx])[:-1] actual_args_section = source_args_as_str.rstrip()
raise ValueError( raise ValueError(
f"Cannot fix docstring of {obj.__name__} in {obj_file} because the argument section in the source code " f"Cannot fix docstring of {obj.__name__} in {obj_file} because the argument section in the source code "
f"does not match the expected format. This usually happens when:\n" f"does not match the expected format. This usually happens when:\n"
@@ -983,6 +987,10 @@ def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str):
# Replace content # Replace content
lines = content.split("\n") lines = content.split("\n")
prev_line_indentation = find_indent(lines[line_number + start_idx - 2])
# Now increase the indentation of every line in new_doc_args by prev_line_indentation
new_doc_args = "\n".join([f"{' ' * prev_line_indentation}{line}" for line in new_doc_args.split("\n")])
lines = lines[: line_number + start_idx - 1] + [new_doc_args] + lines[line_number + idx - 1 :] lines = lines[: line_number + start_idx - 1] + [new_doc_args] + lines[line_number + idx - 1 :]
print(f"Fixing the docstring of {obj.__name__} in {obj_file}.") print(f"Fixing the docstring of {obj.__name__} in {obj_file}.")