From a929f81e92416bbba6a75f461c6ace2da2ff44b4 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Tue, 18 Oct 2022 13:47:36 -0400 Subject: [PATCH] Repo utils test (#19696) * Create repo utils test job * Last occurence * Add tests for tests_fetcher * Better filtering * Let's learn more * Should fix * Should fix * Remove debug * Style * WiP WiP WiP WiP WiP WiP WiP WiP WiP * Quality * address review comments * Fix link --- .circleci/config.yml | 11 ++++- .circleci/create_circleci_config.py | 18 +++++++- tests/repo_utils/test_tests_fetcher.py | 64 ++++++++++++++++++++++++++ utils/tests_fetcher.py | 33 +++++++++---- 4 files changed, 115 insertions(+), 11 deletions(-) create mode 100644 tests/repo_utils/test_tests_fetcher.py diff --git a/.circleci/config.yml b/.circleci/config.yml index d49a563e58..af8d2917a0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -30,7 +30,13 @@ jobs: else touch test_preparation/test_list.txt fi - - run: python utils/tests_fetcher.py --filter_pipeline_tests + - run: | + if [ -f test_repo_utils.txt ]; then + mv test_repo_utils.txt test_preparation/test_repo_utils.txt + else + touch test_preparation/test_repo_utils.txt + fi + - run: python utils/tests_fetcher.py --filter_tests - run: | if [ -f test_list.txt ]; then mv test_list.txt test_preparation/filtered_test_list.txt @@ -75,8 +81,9 @@ jobs: mkdir test_preparation echo "tests" > test_preparation/test_list.txt echo "tests" > test_preparation/examples_test_list.txt - - run: python utils/tests_fetcher.py --filter_pipeline_tests + - run: python utils/tests_fetcher.py --filter_tests - run: mv test_list.txt test_preparation/filtered_test_list.txt + - run: mv test_repo_utils.txt test_preparation/test_repo_utils.txt - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt - store_artifacts: diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 128283a178..097bd71d01 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -324,6 +324,18 @@ layoutlm_job = CircleCIJob( ) +repo_utils_job = CircleCIJob( + "repo_utils", + install_steps=[ + "pip install --upgrade pip", + "pip install .[all,quality,testing]", + ], + parallelism=None, + pytest_num_workers=1, + resource_class=None, + tests_to_run="tests/repo_utils", +) + REGULAR_TESTS = [ torch_and_tf_job, torch_and_flax_job, @@ -344,7 +356,7 @@ PIPELINE_TESTS = [ pipelines_torch_job, pipelines_tf_job, ] - +REPO_UTIL_TESTS = [repo_utils_job] def create_circleci_config(folder=None): if folder is None: @@ -371,6 +383,10 @@ def create_circleci_config(folder=None): example_file = os.path.join(folder, "examples_test_list.txt") if os.path.exists(example_file) and os.path.getsize(example_file) > 0: jobs.extend(EXAMPLES_TESTS) + + repo_util_file = os.path.join(folder, "test_repo_utils.txt") + if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0: + jobs.extend(REPO_UTIL_TESTS) if len(jobs) > 0: config = {"version": "2.1"} diff --git a/tests/repo_utils/test_tests_fetcher.py b/tests/repo_utils/test_tests_fetcher.py new file mode 100644 index 0000000000..0541b72d95 --- /dev/null +++ b/tests/repo_utils/test_tests_fetcher.py @@ -0,0 +1,64 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import unittest + +from git import Repo + + +git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) +sys.path.append(os.path.join(git_repo_path, "utils")) + +transformers_path = os.path.join(git_repo_path, "src", "transformers") +# Tests are run against this specific commit for reproducibility +# https://github.com/huggingface/transformers/tree/07f6690206e39ed7a4d9dbc58824314f7089bb38 +GIT_TEST_SHA = "07f6690206e39ed7a4d9dbc58824314f7089bb38" + +from tests_fetcher import checkout_commit, clean_code, get_module_dependencies # noqa: E402 + + +class CheckDummiesTester(unittest.TestCase): + def test_clean_code(self): + # Clean code removes all strings in triple quotes + self.assertEqual(clean_code('"""\nDocstring\n"""\ncode\n"""Long string"""\ncode\n'), "code\ncode") + self.assertEqual(clean_code("'''\nDocstring\n'''\ncode\n'''Long string'''\ncode\n'''"), "code\ncode") + + # Clean code removes all comments + self.assertEqual(clean_code("code\n# Comment\ncode"), "code\ncode") + self.assertEqual(clean_code("code # inline comment\ncode"), "code \ncode") + + def test_checkout_commit(self): + repo = Repo(git_repo_path) + self.assertNotEqual(repo.head.commit.hexsha, GIT_TEST_SHA) + with checkout_commit(repo, GIT_TEST_SHA): + self.assertEqual(repo.head.commit.hexsha, GIT_TEST_SHA) + self.assertNotEqual(repo.head.commit.hexsha, GIT_TEST_SHA) + + def test_get_module_dependencies(self): + bert_module = os.path.join(transformers_path, "models", "bert", "modeling_bert.py") + expected_deps = [ + "activations.py", + "modeling_outputs.py", + "modeling_utils.py", + "pytorch_utils.py", + "models/bert/configuration_bert.py", + ] + expected_deps = set(os.path.join(transformers_path, f) for f in expected_deps) + repo = Repo(git_repo_path) + with checkout_commit(repo, GIT_TEST_SHA): + deps = get_module_dependencies(bert_module) + deps = set(os.path.expanduser(f) for f in deps) + self.assertEqual(deps, expected_deps) diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index 06a2026309..080c8216f5 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -547,6 +547,7 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j # Grab the corresponding test files: if "setup.py" in impacted_files: test_files_to_run = ["tests"] + repo_utils_launch = True else: # Grab the corresponding test files: test_files_to_run = [] @@ -577,6 +578,12 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j for filter in filters: filtered_files.extend([f for f in test_files_to_run if f.startswith(filter)]) test_files_to_run = filtered_files + repo_utils_launch = any(f.split(os.path.sep)[1] == "repo_utils" for f in test_files_to_run) + + if repo_utils_launch: + repo_util_file = Path(output_file).parent / "test_repo_utils.txt" + with open(repo_util_file, "w", encoding="utf-8") as f: + f.write("tests/repo_utils") print(f"\n### TEST TO RUN ###\n{_print_list(test_files_to_run)}") if len(test_files_to_run) > 0: @@ -620,20 +627,29 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j json.dump(test_map, fp, ensure_ascii=False) -def filter_pipeline_tests(output_file): +def filter_tests(output_file, filters): + """ + Reads the content of the output file and filters out all the tests in a list of given folders. + + Args: + output_file (`str` or `os.PathLike`): The path to the output file of the tests fetcher. + filters (`List[str]`): A list of folders to filter. + """ if not os.path.isfile(output_file): print("No test file found.") return with open(output_file, "r", encoding="utf-8") as f: test_files = f.read().split(" ") - if len(test_files) == 0: + if len(test_files) == 0 or test_files == [""]: print("No tests to filter.") return + + print(test_files) if test_files == ["tests"]: - test_files = [os.path.join("tests", f) for f in os.listdir("tests") if f not in ["__init__.py", "pipelines"]] + test_files = [os.path.join("tests", f) for f in os.listdir("tests") if f not in ["__init__.py"] + filters] else: - test_files = [f for f in test_files if not f.startswith(os.path.join("tests", "pipelines"))] + test_files = [f for f in test_files if f.split(os.path.sep)[1] not in filters] with open(output_file, "w", encoding="utf-8") as f: f.write(" ".join(test_files)) @@ -666,9 +682,9 @@ if __name__ == "__main__": help="Only keep the test files matching one of those filters.", ) parser.add_argument( - "--filter_pipeline_tests", + "--filter_tests", action="store_true", - help="Will filter the pipeline tests outside of the generated list of tests.", + help="Will filter the pipeline/repo utils tests outside of the generated list of tests.", ) parser.add_argument( "--print_dependencies_of", @@ -681,8 +697,8 @@ if __name__ == "__main__": print_tree_deps_of(args.print_dependencies_of) elif args.sanity_check: sanity_check() - elif args.filter_pipeline_tests: - filter_pipeline_tests(args.output_file) + elif args.filter_tests: + filter_tests(args.output_file, ["pipelines", "repo_utils"]) else: repo = Repo(PATH_TO_TRANFORMERS) @@ -698,6 +714,7 @@ if __name__ == "__main__": filters=args.filters, json_output_file=args.json_output_file, ) + filter_tests(args.output_file, ["repo_utils"]) except Exception as e: print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.") with open(args.output_file, "w", encoding="utf-8") as f: