[AutoDocstring] Based on inspect parsing of the signature (#33771)
* delete common docstring * nit * updates * push * fixup * move stuff around fixup * no need for dataclas * damn nice modular * add auto class docstring * style * modular update * import autodocstring * fixup * maybe add original doc! * more cleanup * remove class do cas well * update * nits * more celanup * fix * wups * small check * updatez * some fixes * fix doc * update * nits * try? * nit * some updates * a little bit better * where ever we did not have help we are not really adding it! * revert llama config * small fixes and small tests * test * fixup * more fix-copies * updates * updates * fix doc building * style * small fixes * nits * fix-copies * fix merge issues faster * fix merge conf * nits jamba * ? * working autodoc for model class and forward except returns and example * support return section and unpack kwargs description * nits and cleanup * fix-copies * fix-copies * nits * Add support for llava-like models * fixup * add class args subset support * add examples inferred from automodel/pipelines * update ruff * autodocstring for Aria, Albert + fixups * Fix empty return blocks * fix copies * fix copies * add autodoc for all fast image processors + align, altclip * fix copies * add auto_doc for audio_spectrogram, auto_former, bark, bamba * Drastically improve speed + add bart beit bert * add autodoc to all bert-like models * Fix broken doc * fix copies * fix auto_docstring after merge * add autodoc to models * add models * add models * add models and improve support for optional, and custom shape in args docstring * update fast image processors * refactor auto_method_docstring in args_doc * add models and fix docstring parsing * add models * add models * remove debugging * add models * add fix_auto_docstrings and improve args_docs * add support for additional_info in args docstring * refactor (almost) all models * fix check docstring * fix -copies * fill in all missing docstrings * fix copies * fix qwen3 moe docstring * add documentation * add back labels * update docs and fix can_return_tuple in modular files * fix LongformerForMaskedLM docstring * add auto_docstring to _toctree * remove auto_docstring tests temporarily * fix copyrights new files * fix can_return_tuple granite hybrid * fix fast beit * Fix empty config doc * add support for COMMON_CUSTOM_ARGS in check_docstrings and add missing models * fix code block not closed flava * fix can_return_tuple sam hq * Fix Flaubert dataclass --------- Co-authored-by: yonigozlan <yoni.gozlan@huggingface.co> Co-authored-by: Yoni Gozlan <74535834+yonigozlan@users.noreply.github.com>
This commit is contained in:
@@ -258,7 +258,7 @@ def get_docstring_indent(docstring):
|
||||
return 0
|
||||
|
||||
|
||||
def is_full_docstring(new_docstring: str) -> bool:
|
||||
def is_full_docstring(original_docstring: str, new_docstring: str, original_level: int) -> bool:
|
||||
"""Check if `new_docstring` is a full docstring, or if it is only part of a docstring that should then
|
||||
be merged with the existing old one.
|
||||
"""
|
||||
@@ -267,6 +267,17 @@ def is_full_docstring(new_docstring: str) -> bool:
|
||||
# The docstring contains Args definition, so it is self-contained
|
||||
if re.search(r"\n\s*Args:\n", new_docstring):
|
||||
return True
|
||||
elif re.search(r"\n\s*Args:\n", original_docstring):
|
||||
return False
|
||||
# Check if the docstring contains args docstring (meaning it is self contained):
|
||||
param_pattern = re.compile(
|
||||
# |--- Group 1 ---|| Group 2 ||- Group 3 -||---------- Group 4 ----------|
|
||||
rf"^\s{{0,{original_level}}}(\w+)\s*\(\s*([^, \)]*)(\s*.*?)\s*\)\s*:\s*((?:(?!\n^\s{{0,{original_level}}}\w+\s*\().)*)",
|
||||
re.DOTALL | re.MULTILINE,
|
||||
)
|
||||
match_object = param_pattern.search(new_docstring)
|
||||
if match_object is not None:
|
||||
return True
|
||||
# If it contains Returns, but starts with text indented with an additional 4 spaces before, it is self-contained
|
||||
# (this is the scenario when using `@add_start_docstrings_to_model_forward`, but adding more args to docstring)
|
||||
match_object = re.search(r"\n([^\S\n]*)Returns:\n", new_docstring)
|
||||
@@ -280,7 +291,7 @@ def is_full_docstring(new_docstring: str) -> bool:
|
||||
|
||||
def merge_docstrings(original_docstring, updated_docstring):
|
||||
original_level = get_docstring_indent(original_docstring)
|
||||
if not is_full_docstring(updated_docstring):
|
||||
if not is_full_docstring(original_docstring, updated_docstring, original_level):
|
||||
# Split the docstring at the example section, assuming `"""` is used to define the docstring
|
||||
parts = original_docstring.split("```")
|
||||
if "```" in updated_docstring and len(parts) > 1:
|
||||
@@ -291,13 +302,22 @@ def merge_docstrings(original_docstring, updated_docstring):
|
||||
parts[1] = new_parts[1]
|
||||
updated_docstring = "".join(
|
||||
[
|
||||
parts[0].rstrip(" \n") + new_parts[0],
|
||||
f"\n{original_level * ' '}```",
|
||||
parts[1],
|
||||
"```",
|
||||
parts[2],
|
||||
]
|
||||
)
|
||||
docstring_opening, original_start_docstring = parts[0].rstrip(" \n").split('"""')[:2]
|
||||
new_start_docstring = new_parts[0].rstrip(" \n")
|
||||
docstring_opening += '"""'
|
||||
if new_start_docstring.startswith(original_start_docstring):
|
||||
updated_docstring = new_start_docstring + "\n" + updated_docstring
|
||||
elif original_start_docstring.endswith(new_start_docstring):
|
||||
updated_docstring = original_start_docstring + "\n" + updated_docstring
|
||||
else:
|
||||
updated_docstring = original_start_docstring + "\n" + new_start_docstring + "\n" + updated_docstring
|
||||
updated_docstring = docstring_opening + updated_docstring
|
||||
elif updated_docstring not in original_docstring:
|
||||
# add tabulation if we are at the lowest level.
|
||||
if re.search(r"\n\s*.*\(.*\)\:\n\s*\w", updated_docstring):
|
||||
|
||||
Reference in New Issue
Block a user