Doc styler examples (#14953)
* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from
This commit is contained in:
@@ -19,6 +19,16 @@ import os
|
||||
import re
|
||||
import warnings
|
||||
|
||||
import black
|
||||
|
||||
|
||||
BLACK_AVOID_PATTERNS = {
|
||||
"===PT-TF-SPLIT===": "### PT-TF-SPLIT",
|
||||
"{processor_class}": "FakeProcessorClass",
|
||||
"{model_class}": "FakeModelClass",
|
||||
"{object_class}": "FakeObjectClass",
|
||||
}
|
||||
|
||||
|
||||
# Regexes
|
||||
# Re pattern that catches list introduction (with potential indent)
|
||||
@@ -50,6 +60,136 @@ def find_indent(line):
|
||||
return len(search.groups()[0])
|
||||
|
||||
|
||||
def parse_code_example(code_lines):
|
||||
"""
|
||||
Parses a code example
|
||||
|
||||
Args:
|
||||
code_lines (`List[str]`): The code lines to parse.
|
||||
max_len (`int`): The maximum lengh per line.
|
||||
|
||||
Returns:
|
||||
(List[`str`], List[`str`]): The list of code samples and the list of outputs.
|
||||
"""
|
||||
has_doctest = code_lines[0][:3] in DOCTEST_PROMPTS
|
||||
|
||||
code_samples = []
|
||||
outputs = []
|
||||
in_code = True
|
||||
current_bit = []
|
||||
|
||||
for line in code_lines:
|
||||
if in_code and has_doctest and not is_empty_line(line) and line[:3] not in DOCTEST_PROMPTS:
|
||||
code_sample = "\n".join(current_bit)
|
||||
code_samples.append(code_sample.strip())
|
||||
in_code = False
|
||||
current_bit = []
|
||||
elif not in_code and line[:3] in DOCTEST_PROMPTS:
|
||||
output = "\n".join(current_bit)
|
||||
outputs.append(output.strip())
|
||||
in_code = True
|
||||
current_bit = []
|
||||
|
||||
# Add the line without doctest prompt
|
||||
if line[:3] in DOCTEST_PROMPTS:
|
||||
line = line[4:]
|
||||
current_bit.append(line)
|
||||
|
||||
# Add last sample
|
||||
if in_code:
|
||||
code_sample = "\n".join(current_bit)
|
||||
code_samples.append(code_sample.strip())
|
||||
else:
|
||||
output = "\n".join(current_bit)
|
||||
outputs.append(output.strip())
|
||||
|
||||
return code_samples, outputs
|
||||
|
||||
|
||||
def format_code_example(code: str, max_len: int, in_docstring: bool = False):
|
||||
"""
|
||||
Format a code example using black. Will take into account the doctest syntax as well as any initial indentation in
|
||||
the code provided.
|
||||
|
||||
Args:
|
||||
code (`str`): The code example to format.
|
||||
max_len (`int`): The maximum lengh per line.
|
||||
in_docstring (`bool`, *optional*, defaults to `False`): Whether or not the code example is inside a docstring.
|
||||
|
||||
Returns:
|
||||
`str`: The formatted code.
|
||||
"""
|
||||
code_lines = code.split("\n")
|
||||
|
||||
# Find initial indent
|
||||
idx = 0
|
||||
while idx < len(code_lines) and is_empty_line(code_lines[idx]):
|
||||
idx += 1
|
||||
if idx >= len(code_lines):
|
||||
return "", ""
|
||||
indent = find_indent(code_lines[idx])
|
||||
|
||||
# Remove the initial indent for now, we will had it back after styling.
|
||||
# Note that l[indent:] works for empty lines
|
||||
code_lines = [l[indent:] for l in code_lines[idx:]]
|
||||
has_doctest = code_lines[0][:3] in DOCTEST_PROMPTS
|
||||
|
||||
code_samples, outputs = parse_code_example(code_lines)
|
||||
|
||||
# Let's blackify the code! We put everything in one big text to go faster.
|
||||
delimiter = "\n\n### New code sample ###\n"
|
||||
full_code = delimiter.join(code_samples)
|
||||
line_length = max_len - indent
|
||||
if has_doctest:
|
||||
line_length -= 4
|
||||
|
||||
for k, v in BLACK_AVOID_PATTERNS.items():
|
||||
full_code = full_code.replace(k, v)
|
||||
try:
|
||||
formatted_code = black.format_str(
|
||||
full_code, mode=black.FileMode([black.TargetVersion.PY37], line_length=line_length)
|
||||
)
|
||||
error = ""
|
||||
except Exception as e:
|
||||
formatted_code = full_code
|
||||
error = f"Code sample:\n{full_code}\n\nError message:\n{e}"
|
||||
|
||||
# Let's get back the formatted code samples
|
||||
for k, v in BLACK_AVOID_PATTERNS.items():
|
||||
formatted_code = formatted_code.replace(v, k)
|
||||
# Triple quotes will mess docstrings.
|
||||
if in_docstring:
|
||||
formatted_code = formatted_code.replace('"""', "'''")
|
||||
|
||||
code_samples = formatted_code.split(delimiter)
|
||||
# We can have one output less than code samples
|
||||
if len(outputs) == len(code_samples) - 1:
|
||||
outputs.append("")
|
||||
|
||||
formatted_lines = []
|
||||
for code_sample, output in zip(code_samples, outputs):
|
||||
# black may have added some new lines, we remove them
|
||||
code_sample = code_sample.strip()
|
||||
in_triple_quotes = False
|
||||
for line in code_sample.strip().split("\n"):
|
||||
if has_doctest and not is_empty_line(line):
|
||||
prefix = "... " if line.startswith(" ") or line in [")", "]", "}"] or in_triple_quotes else ">>> "
|
||||
else:
|
||||
prefix = ""
|
||||
indent_str = "" if is_empty_line(line) else (" " * indent)
|
||||
formatted_lines.append(indent_str + prefix + line)
|
||||
|
||||
if '"""' in line:
|
||||
in_triple_quotes = not in_triple_quotes
|
||||
|
||||
formatted_lines.extend([" " * indent + line for line in output.split("\n")])
|
||||
if not output.endswith("===PT-TF-SPLIT==="):
|
||||
formatted_lines.append("")
|
||||
|
||||
result = "\n".join(formatted_lines)
|
||||
return result.rstrip(), error
|
||||
|
||||
|
||||
def format_text(text, max_len, prefix="", min_indent=None):
|
||||
"""
|
||||
Format a text in the biggest lines possible with the constraint of a maximum length and an indentation.
|
||||
@@ -110,6 +250,7 @@ def style_docstring(docstring, max_len):
|
||||
in_code = False
|
||||
param_indent = -1
|
||||
prefix = ""
|
||||
black_errors = []
|
||||
|
||||
# Special case for docstrings that begin with continuation of Args with no Args block.
|
||||
idx = 0
|
||||
@@ -153,8 +294,10 @@ def style_docstring(docstring, max_len):
|
||||
current_indent = -1
|
||||
code = "\n".join(current_paragraph)
|
||||
if current_code in ["py", "python"]:
|
||||
new_lines.append(code)
|
||||
# new_lines.append(format_code_example(code, max_len))
|
||||
formatted_code, error = format_code_example(code, max_len, in_docstring=True)
|
||||
new_lines.append(formatted_code)
|
||||
if len(error) > 0:
|
||||
black_errors.append(error)
|
||||
else:
|
||||
new_lines.append(code)
|
||||
current_paragraph = None
|
||||
@@ -210,7 +353,7 @@ def style_docstring(docstring, max_len):
|
||||
paragraph = " ".join(current_paragraph)
|
||||
new_lines.append(format_text(paragraph, max_len, prefix=prefix, min_indent=current_indent))
|
||||
|
||||
return "\n".join(new_lines)
|
||||
return "\n".join(new_lines), "\n\n".join(black_errors)
|
||||
|
||||
|
||||
def style_file_docstrings(code_file, max_len=119, check_only=False):
|
||||
@@ -234,6 +377,8 @@ def style_file_docstrings(code_file, max_len=119, check_only=False):
|
||||
(s if i % 2 == 0 or _re_doc_ignore.search(splits[i - 1]) is not None else style_docstring(s, max_len=max_len))
|
||||
for i, s in enumerate(splits)
|
||||
]
|
||||
black_errors = "\n\n".join([s[1] for s in splits if isinstance(s, tuple) and len(s[1]) > 0])
|
||||
splits = [s[0] if isinstance(s, tuple) else s for s in splits]
|
||||
clean_code = '\"\"\"'.join(splits)
|
||||
# fmt: on
|
||||
|
||||
@@ -243,7 +388,7 @@ def style_file_docstrings(code_file, max_len=119, check_only=False):
|
||||
with open(code_file, "w", encoding="utf-8", newline="\n") as f:
|
||||
f.write(clean_code)
|
||||
|
||||
return diff
|
||||
return diff, black_errors
|
||||
|
||||
|
||||
def style_mdx_file(mdx_file, max_len=119, check_only=False):
|
||||
@@ -267,6 +412,8 @@ def style_mdx_file(mdx_file, max_len=119, check_only=False):
|
||||
current_language = ""
|
||||
in_code = False
|
||||
new_lines = []
|
||||
black_errors = []
|
||||
|
||||
for line in lines:
|
||||
if _re_code.search(line) is not None:
|
||||
in_code = not in_code
|
||||
@@ -276,8 +423,9 @@ def style_mdx_file(mdx_file, max_len=119, check_only=False):
|
||||
else:
|
||||
code = "\n".join(current_code)
|
||||
if current_language in ["py", "python"]:
|
||||
pass
|
||||
# code = format_code_example(code, max_len)
|
||||
code, error = format_code_example(code, max_len)
|
||||
if len(error) > 0:
|
||||
black_errors.append(error)
|
||||
new_lines.append(code)
|
||||
|
||||
new_lines.append(line)
|
||||
@@ -293,7 +441,7 @@ def style_mdx_file(mdx_file, max_len=119, check_only=False):
|
||||
with open(mdx_file, "w", encoding="utf-8", newline="\n") as f:
|
||||
f.write(clean_content)
|
||||
|
||||
return diff
|
||||
return diff, "\n\n".join(black_errors)
|
||||
|
||||
|
||||
def style_doc_files(*files, max_len=119, check_only=False):
|
||||
@@ -310,26 +458,49 @@ def style_doc_files(*files, max_len=119, check_only=False):
|
||||
List[`str`]: The list of files changed or that should be restyled.
|
||||
"""
|
||||
changed = []
|
||||
black_errors = []
|
||||
for file in files:
|
||||
# Treat folders
|
||||
if os.path.isdir(file):
|
||||
files = [os.path.join(file, f) for f in os.listdir(file)]
|
||||
files = [f for f in files if os.path.isdir(f) or f.endswith(".rst") or f.endswith(".py")]
|
||||
files = [f for f in files if os.path.isdir(f) or f.endswith(".mdx") or f.endswith(".py")]
|
||||
changed += style_doc_files(*files, max_len=max_len, check_only=check_only)
|
||||
# Treat mdx
|
||||
elif file.endswith(".mdx"):
|
||||
if style_mdx_file(file, max_len=max_len, check_only=check_only):
|
||||
changed.append(file)
|
||||
try:
|
||||
diff, black_error = style_mdx_file(file, max_len=max_len, check_only=check_only)
|
||||
if diff:
|
||||
changed.append(file)
|
||||
if len(black_error) > 0:
|
||||
black_errors.append(
|
||||
f"There was a problem while formatting an example in {file} with black:\m{black_error}"
|
||||
)
|
||||
except Exception:
|
||||
print(f"There is a problem in {file}.")
|
||||
raise
|
||||
# Treat python files
|
||||
elif file.endswith(".py"):
|
||||
try:
|
||||
if style_file_docstrings(file, max_len=max_len, check_only=check_only):
|
||||
diff, black_error = style_file_docstrings(file, max_len=max_len, check_only=check_only)
|
||||
if diff:
|
||||
changed.append(file)
|
||||
if len(black_error) > 0:
|
||||
black_errors.append(
|
||||
f"There was a problem while formatting an example in {file} with black:\m{black_error}"
|
||||
)
|
||||
except Exception:
|
||||
print(f"There is a problem in {file}.")
|
||||
raise
|
||||
else:
|
||||
warnings.warn(f"Ignoring {file} because it's not a py or an mdx file or a folder.")
|
||||
if len(black_errors) > 0:
|
||||
black_message = "\n\n".join(black_errors)
|
||||
raise ValueError(
|
||||
"Some code examples can't be interpreted by black, which means they aren't regular python:\n\n"
|
||||
+ black_message
|
||||
+ "\n\nMake sure to fix the corresponding docstring or doc file, or remove the py/python after ``` if it "
|
||||
+ "was not supposed to be a Python code sample."
|
||||
)
|
||||
return changed
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user