From a929f81e92416bbba6a75f461c6ace2da2ff44b4 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Tue, 18 Oct 2022 13:47:36 -0400
Subject: [PATCH] Repo utils test (#19696)

* Create repo utils test job

* Last occurence

* Add tests for tests_fetcher

* Better filtering

* Let's learn more

* Should fix

* Should fix

* Remove debug

* Style

* WiP

WiP

WiP

WiP

WiP

WiP

WiP

WiP

WiP

* Quality

* address review comments

* Fix link
---
 .circleci/config.yml                   | 11 ++++-
 .circleci/create_circleci_config.py    | 18 +++++++-
 tests/repo_utils/test_tests_fetcher.py | 64 ++++++++++++++++++++++++++
 utils/tests_fetcher.py                 | 33 +++++++++----
 4 files changed, 115 insertions(+), 11 deletions(-)
 create mode 100644 tests/repo_utils/test_tests_fetcher.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index d49a563e58..af8d2917a0 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -30,7 +30,13 @@ jobs:
                 else
                     touch test_preparation/test_list.txt
                 fi
-            - run: python utils/tests_fetcher.py --filter_pipeline_tests
+            - run: |
+                if [ -f test_repo_utils.txt ]; then
+                    mv test_repo_utils.txt test_preparation/test_repo_utils.txt
+                else
+                    touch test_preparation/test_repo_utils.txt
+                fi
+            - run: python utils/tests_fetcher.py --filter_tests
             - run: |
                 if [ -f test_list.txt ]; then
                     mv test_list.txt test_preparation/filtered_test_list.txt
@@ -75,8 +81,9 @@ jobs:
                   mkdir test_preparation
                   echo "tests" > test_preparation/test_list.txt
                   echo "tests" > test_preparation/examples_test_list.txt
-            - run: python utils/tests_fetcher.py --filter_pipeline_tests
+            - run: python utils/tests_fetcher.py --filter_tests
             - run: mv test_list.txt test_preparation/filtered_test_list.txt
+            - run: mv test_repo_utils.txt test_preparation/test_repo_utils.txt
             - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation
             - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt
             - store_artifacts:
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
index 128283a178..097bd71d01 100644
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@@ -324,6 +324,18 @@ layoutlm_job = CircleCIJob(
 )
 
 
+repo_utils_job = CircleCIJob(
+    "repo_utils",
+    install_steps=[
+        "pip install --upgrade pip",
+        "pip install .[all,quality,testing]",
+    ],
+    parallelism=None,
+    pytest_num_workers=1,
+    resource_class=None,
+    tests_to_run="tests/repo_utils",
+)
+
 REGULAR_TESTS = [
     torch_and_tf_job,
     torch_and_flax_job,
@@ -344,7 +356,7 @@ PIPELINE_TESTS = [
     pipelines_torch_job,
     pipelines_tf_job,
 ]
-
+REPO_UTIL_TESTS = [repo_utils_job]
 
 def create_circleci_config(folder=None):
     if folder is None:
@@ -371,6 +383,10 @@ def create_circleci_config(folder=None):
     example_file = os.path.join(folder, "examples_test_list.txt")
     if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
         jobs.extend(EXAMPLES_TESTS)
+    
+    repo_util_file = os.path.join(folder, "test_repo_utils.txt")
+    if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
+        jobs.extend(REPO_UTIL_TESTS)
 
     if len(jobs) > 0:
         config = {"version": "2.1"}
diff --git a/tests/repo_utils/test_tests_fetcher.py b/tests/repo_utils/test_tests_fetcher.py
new file mode 100644
index 0000000000..0541b72d95
--- /dev/null
+++ b/tests/repo_utils/test_tests_fetcher.py
@@ -0,0 +1,64 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+
+from git import Repo
+
+
+git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+sys.path.append(os.path.join(git_repo_path, "utils"))
+
+transformers_path = os.path.join(git_repo_path, "src", "transformers")
+# Tests are run against this specific commit for reproducibility
+# https://github.com/huggingface/transformers/tree/07f6690206e39ed7a4d9dbc58824314f7089bb38
+GIT_TEST_SHA = "07f6690206e39ed7a4d9dbc58824314f7089bb38"
+
+from tests_fetcher import checkout_commit, clean_code, get_module_dependencies  # noqa: E402
+
+
+class CheckDummiesTester(unittest.TestCase):
+    def test_clean_code(self):
+        # Clean code removes all strings in triple quotes
+        self.assertEqual(clean_code('"""\nDocstring\n"""\ncode\n"""Long string"""\ncode\n'), "code\ncode")
+        self.assertEqual(clean_code("'''\nDocstring\n'''\ncode\n'''Long string'''\ncode\n'''"), "code\ncode")
+
+        # Clean code removes all comments
+        self.assertEqual(clean_code("code\n# Comment\ncode"), "code\ncode")
+        self.assertEqual(clean_code("code  # inline comment\ncode"), "code  \ncode")
+
+    def test_checkout_commit(self):
+        repo = Repo(git_repo_path)
+        self.assertNotEqual(repo.head.commit.hexsha, GIT_TEST_SHA)
+        with checkout_commit(repo, GIT_TEST_SHA):
+            self.assertEqual(repo.head.commit.hexsha, GIT_TEST_SHA)
+        self.assertNotEqual(repo.head.commit.hexsha, GIT_TEST_SHA)
+
+    def test_get_module_dependencies(self):
+        bert_module = os.path.join(transformers_path, "models", "bert", "modeling_bert.py")
+        expected_deps = [
+            "activations.py",
+            "modeling_outputs.py",
+            "modeling_utils.py",
+            "pytorch_utils.py",
+            "models/bert/configuration_bert.py",
+        ]
+        expected_deps = set(os.path.join(transformers_path, f) for f in expected_deps)
+        repo = Repo(git_repo_path)
+        with checkout_commit(repo, GIT_TEST_SHA):
+            deps = get_module_dependencies(bert_module)
+        deps = set(os.path.expanduser(f) for f in deps)
+        self.assertEqual(deps, expected_deps)
diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py
index 06a2026309..080c8216f5 100644
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -547,6 +547,7 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j
     # Grab the corresponding test files:
     if "setup.py" in impacted_files:
         test_files_to_run = ["tests"]
+        repo_utils_launch = True
     else:
         # Grab the corresponding test files:
         test_files_to_run = []
@@ -577,6 +578,12 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j
             for filter in filters:
                 filtered_files.extend([f for f in test_files_to_run if f.startswith(filter)])
             test_files_to_run = filtered_files
+        repo_utils_launch = any(f.split(os.path.sep)[1] == "repo_utils" for f in test_files_to_run)
+
+    if repo_utils_launch:
+        repo_util_file = Path(output_file).parent / "test_repo_utils.txt"
+        with open(repo_util_file, "w", encoding="utf-8") as f:
+            f.write("tests/repo_utils")
 
     print(f"\n### TEST TO RUN ###\n{_print_list(test_files_to_run)}")
     if len(test_files_to_run) > 0:
@@ -620,20 +627,29 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j
                 json.dump(test_map, fp, ensure_ascii=False)
 
 
-def filter_pipeline_tests(output_file):
+def filter_tests(output_file, filters):
+    """
+    Reads the content of the output file and filters out all the tests in a list of given folders.
+
+    Args:
+        output_file (`str` or `os.PathLike`): The path to the output file of the tests fetcher.
+        filters (`List[str]`): A list of folders to filter.
+    """
     if not os.path.isfile(output_file):
         print("No test file found.")
         return
     with open(output_file, "r", encoding="utf-8") as f:
         test_files = f.read().split(" ")
 
-    if len(test_files) == 0:
+    if len(test_files) == 0 or test_files == [""]:
         print("No tests to filter.")
         return
+
+    print(test_files)
     if test_files == ["tests"]:
-        test_files = [os.path.join("tests", f) for f in os.listdir("tests") if f not in ["__init__.py", "pipelines"]]
+        test_files = [os.path.join("tests", f) for f in os.listdir("tests") if f not in ["__init__.py"] + filters]
     else:
-        test_files = [f for f in test_files if not f.startswith(os.path.join("tests", "pipelines"))]
+        test_files = [f for f in test_files if f.split(os.path.sep)[1] not in filters]
 
     with open(output_file, "w", encoding="utf-8") as f:
         f.write(" ".join(test_files))
@@ -666,9 +682,9 @@ if __name__ == "__main__":
         help="Only keep the test files matching one of those filters.",
     )
     parser.add_argument(
-        "--filter_pipeline_tests",
+        "--filter_tests",
         action="store_true",
-        help="Will filter the pipeline tests outside of the generated list of tests.",
+        help="Will filter the pipeline/repo utils tests outside of the generated list of tests.",
     )
     parser.add_argument(
         "--print_dependencies_of",
@@ -681,8 +697,8 @@ if __name__ == "__main__":
         print_tree_deps_of(args.print_dependencies_of)
     elif args.sanity_check:
         sanity_check()
-    elif args.filter_pipeline_tests:
-        filter_pipeline_tests(args.output_file)
+    elif args.filter_tests:
+        filter_tests(args.output_file, ["pipelines", "repo_utils"])
     else:
         repo = Repo(PATH_TO_TRANFORMERS)
 
@@ -698,6 +714,7 @@ if __name__ == "__main__":
                 filters=args.filters,
                 json_output_file=args.json_output_file,
             )
+            filter_tests(args.output_file, ["repo_utils"])
         except Exception as e:
             print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.")
             with open(args.output_file, "w", encoding="utf-8") as f: