Run doctest (in PRs) only when some doc example(s) are modified (#23387)
* fix * fix * update --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -43,6 +43,12 @@ jobs:
|
|||||||
else
|
else
|
||||||
touch test_preparation/test_list.txt
|
touch test_preparation/test_list.txt
|
||||||
fi
|
fi
|
||||||
|
- run: |
|
||||||
|
if [ -f doctest_list.txt ]; then
|
||||||
|
cp doctest_list.txt test_preparation/doctest_list.txt
|
||||||
|
else
|
||||||
|
touch test_preparation/doctest_list.txt
|
||||||
|
fi
|
||||||
- run: |
|
- run: |
|
||||||
if [ -f test_repo_utils.txt ]; then
|
if [ -f test_repo_utils.txt ]; then
|
||||||
mv test_repo_utils.txt test_preparation/test_repo_utils.txt
|
mv test_repo_utils.txt test_preparation/test_repo_utils.txt
|
||||||
@@ -71,6 +77,8 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
path: test_preparation/test_list.txt
|
path: test_preparation/test_list.txt
|
||||||
|
- store_artifacts:
|
||||||
|
path: test_preparation/doctest_list.txt
|
||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
path: ~/transformers/test_preparation/filtered_test_list.txt
|
path: ~/transformers/test_preparation/filtered_test_list.txt
|
||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
|
|||||||
@@ -483,7 +483,6 @@ REGULAR_TESTS = [
|
|||||||
hub_job,
|
hub_job,
|
||||||
onnx_job,
|
onnx_job,
|
||||||
exotic_models_job,
|
exotic_models_job,
|
||||||
doc_test_job
|
|
||||||
]
|
]
|
||||||
EXAMPLES_TESTS = [
|
EXAMPLES_TESTS = [
|
||||||
examples_torch_job,
|
examples_torch_job,
|
||||||
@@ -495,6 +494,8 @@ PIPELINE_TESTS = [
|
|||||||
pipelines_tf_job,
|
pipelines_tf_job,
|
||||||
]
|
]
|
||||||
REPO_UTIL_TESTS = [repo_utils_job]
|
REPO_UTIL_TESTS = [repo_utils_job]
|
||||||
|
DOC_TESTS = [doc_test_job]
|
||||||
|
|
||||||
|
|
||||||
def create_circleci_config(folder=None):
|
def create_circleci_config(folder=None):
|
||||||
if folder is None:
|
if folder is None:
|
||||||
@@ -552,6 +553,15 @@ def create_circleci_config(folder=None):
|
|||||||
if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
|
if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
|
||||||
jobs.extend(EXAMPLES_TESTS)
|
jobs.extend(EXAMPLES_TESTS)
|
||||||
|
|
||||||
|
doctest_file = os.path.join(folder, "doctest_list.txt")
|
||||||
|
if os.path.exists(doctest_file):
|
||||||
|
with open(doctest_file) as f:
|
||||||
|
doctest_list = f.read()
|
||||||
|
else:
|
||||||
|
doctest_list = []
|
||||||
|
if len(doctest_list) > 0:
|
||||||
|
jobs.extend(DOC_TESTS)
|
||||||
|
|
||||||
repo_util_file = os.path.join(folder, "test_repo_utils.txt")
|
repo_util_file = os.path.join(folder, "test_repo_utils.txt")
|
||||||
if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
|
if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
|
||||||
jobs.extend(REPO_UTIL_TESTS)
|
jobs.extend(REPO_UTIL_TESTS)
|
||||||
|
|||||||
@@ -116,6 +116,26 @@ def clean_code(content):
|
|||||||
return "\n".join(lines_to_keep)
|
return "\n".join(lines_to_keep)
|
||||||
|
|
||||||
|
|
||||||
|
def keep_doc_examples_only(content):
|
||||||
|
"""
|
||||||
|
Remove code, docstring that is not code example, empty line or comments from `content`.
|
||||||
|
"""
|
||||||
|
# Keep doc examples only by splitting on triple "`"
|
||||||
|
splits = content.split("```")
|
||||||
|
# Add leading and trailing "```" so the navigation is easier when compared to the original input `content`
|
||||||
|
content = "```" + "```".join(splits[1::2]) + "```"
|
||||||
|
|
||||||
|
# Remove empty lines and comments
|
||||||
|
lines_to_keep = []
|
||||||
|
for line in content.split("\n"):
|
||||||
|
# remove anything that is after a # sign.
|
||||||
|
line = re.sub("#.*$", "", line)
|
||||||
|
if len(line) == 0 or line.isspace():
|
||||||
|
continue
|
||||||
|
lines_to_keep.append(line)
|
||||||
|
return "\n".join(lines_to_keep)
|
||||||
|
|
||||||
|
|
||||||
def get_all_tests():
|
def get_all_tests():
|
||||||
"""
|
"""
|
||||||
Return a list of paths to all test folders and files under `tests`. All paths are rooted at `tests`.
|
Return a list of paths to all test folders and files under `tests`. All paths are rooted at `tests`.
|
||||||
@@ -162,6 +182,24 @@ def diff_is_docstring_only(repo, branching_point, filename):
|
|||||||
return old_content_clean == new_content_clean
|
return old_content_clean == new_content_clean
|
||||||
|
|
||||||
|
|
||||||
|
def diff_contains_doc_examples(repo, branching_point, filename):
|
||||||
|
"""
|
||||||
|
Check if the diff is only in code in a filename.
|
||||||
|
"""
|
||||||
|
folder = Path(repo.working_dir)
|
||||||
|
with checkout_commit(repo, branching_point):
|
||||||
|
with open(folder / filename, "r", encoding="utf-8") as f:
|
||||||
|
old_content = f.read()
|
||||||
|
|
||||||
|
with open(folder / filename, "r", encoding="utf-8") as f:
|
||||||
|
new_content = f.read()
|
||||||
|
|
||||||
|
old_content_clean = keep_doc_examples_only(old_content)
|
||||||
|
new_content_clean = keep_doc_examples_only(new_content)
|
||||||
|
|
||||||
|
return old_content_clean != new_content_clean
|
||||||
|
|
||||||
|
|
||||||
def get_diff(repo, base_commit, commits):
|
def get_diff(repo, base_commit, commits):
|
||||||
"""
|
"""
|
||||||
Get's the diff between one or several commits and the head of the repository.
|
Get's the diff between one or several commits and the head of the repository.
|
||||||
@@ -216,32 +254,46 @@ def get_modified_python_files(diff_with_last_commit=False):
|
|||||||
return get_diff(repo, repo.head.commit, parent_commits)
|
return get_diff(repo, repo.head.commit, parent_commits)
|
||||||
|
|
||||||
|
|
||||||
def get_diff_for_py_and_mdx_files(repo, base_commit, commits):
|
def get_diff_for_doctesting(repo, base_commit, commits):
|
||||||
"""
|
"""
|
||||||
Get's the diff between one or several commits and the head of the repository.
|
Get's the diff between one or several commits and the head of the repository where some doc example(s) are changed.
|
||||||
"""
|
"""
|
||||||
print("\n### DIFF ###\n")
|
print("\n### DIFF ###\n")
|
||||||
code_diff = []
|
code_diff = []
|
||||||
for commit in commits:
|
for commit in commits:
|
||||||
for diff_obj in commit.diff(base_commit):
|
for diff_obj in commit.diff(base_commit):
|
||||||
# We always add new python files
|
# We always add new python/mdx files
|
||||||
if diff_obj.change_type in ["A", "M", "R"] and (
|
if diff_obj.change_type in ["A"] and (diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".mdx")):
|
||||||
diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".mdx")
|
|
||||||
):
|
|
||||||
code_diff.append(diff_obj.b_path)
|
code_diff.append(diff_obj.b_path)
|
||||||
|
# Now for modified files
|
||||||
|
elif (
|
||||||
|
diff_obj.change_type in ["M", "R"]
|
||||||
|
and diff_obj.b_path.endswith(".py")
|
||||||
|
or diff_obj.b_path.endswith(".mdx")
|
||||||
|
):
|
||||||
|
# In case of renames, we'll look at the tests using both the old and new name.
|
||||||
|
if diff_obj.a_path != diff_obj.b_path:
|
||||||
|
code_diff.extend([diff_obj.a_path, diff_obj.b_path])
|
||||||
|
else:
|
||||||
|
# Otherwise, we check modifications contain some doc example(s).
|
||||||
|
if diff_contains_doc_examples(repo, commit, diff_obj.b_path):
|
||||||
|
code_diff.append(diff_obj.a_path)
|
||||||
|
else:
|
||||||
|
print(f"Ignoring diff in {diff_obj.b_path} as it doesn't contain any doc example.")
|
||||||
|
|
||||||
return code_diff
|
return code_diff
|
||||||
|
|
||||||
|
|
||||||
def get_modified_python_and_mdx_files(diff_with_last_commit=False):
|
def get_doctest_files(diff_with_last_commit=False):
|
||||||
"""
|
"""
|
||||||
Return a list of python and mdx files that have been modified between:
|
Return a list of python and mdx files where some doc example(s) in them have been modified between:
|
||||||
|
|
||||||
- the current head and the main branch if `diff_with_last_commit=False` (default)
|
- the current head and the main branch if `diff_with_last_commit=False` (default)
|
||||||
- the current head and its parent commit otherwise.
|
- the current head and its parent commit otherwise.
|
||||||
"""
|
"""
|
||||||
repo = Repo(PATH_TO_REPO)
|
repo = Repo(PATH_TO_REPO)
|
||||||
|
|
||||||
|
test_files_to_run = [] # noqa
|
||||||
if not diff_with_last_commit:
|
if not diff_with_last_commit:
|
||||||
print(f"main is at {repo.refs.main.commit}")
|
print(f"main is at {repo.refs.main.commit}")
|
||||||
print(f"Current head is at {repo.head.commit}")
|
print(f"Current head is at {repo.head.commit}")
|
||||||
@@ -249,23 +301,14 @@ def get_modified_python_and_mdx_files(diff_with_last_commit=False):
|
|||||||
branching_commits = repo.merge_base(repo.refs.main, repo.head)
|
branching_commits = repo.merge_base(repo.refs.main, repo.head)
|
||||||
for commit in branching_commits:
|
for commit in branching_commits:
|
||||||
print(f"Branching commit: {commit}")
|
print(f"Branching commit: {commit}")
|
||||||
return get_diff_for_py_and_mdx_files(repo, repo.head.commit, branching_commits)
|
test_files_to_run = get_diff_for_doctesting(repo, repo.head.commit, branching_commits)
|
||||||
else:
|
else:
|
||||||
print(f"main is at {repo.head.commit}")
|
print(f"main is at {repo.head.commit}")
|
||||||
parent_commits = repo.head.commit.parents
|
parent_commits = repo.head.commit.parents
|
||||||
for commit in parent_commits:
|
for commit in parent_commits:
|
||||||
print(f"Parent commit: {commit}")
|
print(f"Parent commit: {commit}")
|
||||||
return get_diff_for_py_and_mdx_files(repo, repo.head.commit, parent_commits)
|
test_files_to_run = get_diff_for_doctesting(repo, repo.head.commit, parent_commits)
|
||||||
|
|
||||||
|
|
||||||
def get_doctest_files(diff_with_last_commit=False):
|
|
||||||
"""
|
|
||||||
Return a list of python and mdx files that have been modified between:
|
|
||||||
|
|
||||||
- the current head and the main branch if `diff_with_last_commit=False` (default)
|
|
||||||
- the current head and its parent commit otherwise.
|
|
||||||
"""
|
|
||||||
test_files_to_run = get_modified_python_and_mdx_files(diff_with_last_commit)
|
|
||||||
with open("utils/documentation_tests.txt") as fp:
|
with open("utils/documentation_tests.txt") as fp:
|
||||||
documentation_tests = set(fp.read().strip().split("\n"))
|
documentation_tests = set(fp.read().strip().split("\n"))
|
||||||
# So far we don't have 100% coverage for doctest. This line will be removed once we achieve 100%.
|
# So far we don't have 100% coverage for doctest. This line will be removed once we achieve 100%.
|
||||||
@@ -647,6 +690,14 @@ def infer_tests_to_run(
|
|||||||
|
|
||||||
create_json_map(test_files_to_run, json_output_file)
|
create_json_map(test_files_to_run, json_output_file)
|
||||||
|
|
||||||
|
doctest_list = get_doctest_files()
|
||||||
|
|
||||||
|
print(f"\n### DOCTEST TO RUN ###\n{_print_list(doctest_list)}")
|
||||||
|
if len(doctest_list) > 0:
|
||||||
|
doctest_file = Path(output_file).parent / "doctest_list.txt"
|
||||||
|
with open(doctest_file, "w", encoding="utf-8") as f:
|
||||||
|
f.write(" ".join(doctest_list))
|
||||||
|
|
||||||
|
|
||||||
def filter_tests(output_file, filters):
|
def filter_tests(output_file, filters):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user