Roll out the test fetcher on push tests (#13055)

* Use test fetcher for push tests as well * Force diff with last commit for circleCI on master * Fix syntax error * Style * Schedule nightly tests
2021-08-10 14:54:52 +02:00
parent 2e0d767ab2
commit 9e9b8f1d99
3 changed files with 421 additions and 30 deletions
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -82,23 +82,39 @@ def diff_is_docstring_only(repo, branching_point, filename):
    return old_content_clean == new_content_clean


-def get_modified_python_files():
+def get_modified_python_files(diff_with_last_commit=False):
    """
-    Return a list of python files that have been modified between the current head and the master branch.
+    Return a list of python files that have been modified between:
+
+    - the current head and the master branch if `diff_with_last_commit=False` (default)
+    - the current head and its parent commit otherwise.
    """
    repo = Repo(PATH_TO_TRANFORMERS)

-    print(f"Master is at {repo.refs.master.commit}")
-    print(f"Current head is at {repo.head.commit}")
+    if not diff_with_last_commit:
+        print(f"Master is at {repo.refs.master.commit}")
+        print(f"Current head is at {repo.head.commit}")

-    branching_commits = repo.merge_base(repo.refs.master, repo.head)
-    for commit in branching_commits:
-        print(f"Branching commit: {commit}")
+        branching_commits = repo.merge_base(repo.refs.master, repo.head)
+        for commit in branching_commits:
+            print(f"Branching commit: {commit}")
+        return get_diff(repo, repo.head.commit, branching_commits)
+    else:
+        print(f"Master is at {repo.head.commit}")
+        parent_commits = repo.head.commit.parents
+        for commit in parent_commits:
+            print(f"Parent commit: {commit}")
+        return get_diff(repo, repo.head.commit, parent_commits)

+
+def get_diff(repo, base_commit, commits):
+    """
+    Get's the diff between one or several commits and the head of the repository.
+    """
    print("\n### DIFF ###\n")
    code_diff = []
-    for commit in branching_commits:
-        for diff_obj in commit.diff(repo.head.commit):
+    for commit in commits:
+        for diff_obj in commit.diff(base_commit):
            # We always add new python files
            if diff_obj.change_type == "A" and diff_obj.b_path.endswith(".py"):
                code_diff.append(diff_obj.b_path)
@@ -365,8 +381,8 @@ def sanity_check():
        )


-def infer_tests_to_run(output_file):
-    modified_files = get_modified_python_files()
+def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None):
+    modified_files = get_modified_python_files(diff_with_last_commit=diff_with_last_commit)
    print(f"\n### MODIFIED FILES ###\n{_print_list(modified_files)}")

    # Create the map that will give us all impacted modules.
@@ -396,6 +412,10 @@ def infer_tests_to_run(output_file):

    # Remove duplicates
    test_files_to_run = sorted(list(set(test_files_to_run)))
+    if filters is not None:
+        for filter in filters:
+            test_files_to_run = [f for f in test_files_to_run if f.startswith(filter)]
+
    print(f"\n### TEST TO RUN ###\n{_print_list(test_files_to_run)}")
    if len(test_files_to_run) > 0:
        with open(output_file, "w", encoding="utf-8") as f:
@@ -410,22 +430,28 @@ if __name__ == "__main__":
    parser.add_argument(
        "--output_file", type=str, default="test_list.txt", help="Where to store the list of tests to run"
    )
+    parser.add_argument(
+        "--diff_with_last_commit",
+        action="store_true",
+        help="To fetch the tests between the current commit and the last commit",
+    )
+    parser.add_argument(
+        "--filters", type=str, nargs="*", help="Only keep the test files matching one of those filters."
+    )
    args = parser.parse_args()
    if args.sanity_check:
        sanity_check()
    else:
        repo = Repo(PATH_TO_TRANFORMERS)
-        # For now we run all tests on the master branch. After testing this more and making sure it works most of the
-        # time, we will apply the same logic to the tests on the master branch and only run the whole suite once per
-        # day.
+
+        diff_with_last_commit = args.diff_with_last_commit
        if not repo.head.is_detached and repo.head.ref == repo.refs.master:
-            print("Master branch detected, running all tests.")
+            print("Master branch detected, fetching tests against last commit.")
+            diff_with_last_commit = True
+
+        try:
+            infer_tests_to_run(args.output_file, diff_with_last_commit=diff_with_last_commit, filters=args.filters)
+        except Exception as e:
+            print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.")
            with open(args.output_file, "w", encoding="utf-8") as f:
                f.write("./tests/")
-        else:
-            try:
-                infer_tests_to_run(args.output_file)
-            except Exception as e:
-                print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.")
-                with open(args.output_file, "w", encoding="utf-8") as f:
-                    f.write("./tests/")