From 30e92ea3238fde9af15d060686ae2d1f8cf2524b Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 28 Nov 2023 17:21:21 +0100 Subject: [PATCH] Trigger corresponding pipeline tests if `tests/utils/tiny_model_summary.json` is modified (#27693) * fix --------- Co-authored-by: ydshieh --- tests/models/phi/test_modeling_phi.py | 6 ++ utils/tests_fetcher.py | 126 +++++++++++++++++++++++++- 2 files changed, 127 insertions(+), 5 deletions(-) diff --git a/tests/models/phi/test_modeling_phi.py b/tests/models/phi/test_modeling_phi.py index 93c5ca85e9..94dcde81f7 100644 --- a/tests/models/phi/test_modeling_phi.py +++ b/tests/models/phi/test_modeling_phi.py @@ -288,6 +288,12 @@ class PhiModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, test_headmasking = False test_pruning = False + # TODO (ydshieh): Check this. See https://app.circleci.com/pipelines/github/huggingface/transformers/79292/workflows/fa2ba644-8953-44a6-8f67-ccd69ca6a476/jobs/1012905 + def is_pipeline_test_to_skip( + self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name + ): + return True + # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.setUp with Llama->Phi def setUp(self): self.model_tester = PhiModelTester(self) diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index c7638a129a..1aae59e4b3 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -51,9 +51,11 @@ python utils/tests_fetcher.py --diff_with_last_commit import argparse import collections +import importlib.util import json import os import re +import tempfile from contextlib import contextmanager from pathlib import Path from typing import Dict, List, Optional, Tuple, Union @@ -254,6 +256,122 @@ def diff_contains_doc_examples(repo: Repo, branching_point: str, filename: str) return old_content_clean != new_content_clean +def get_impacted_files_from_tiny_model_summary(diff_with_last_commit: bool = False) -> List[str]: + """ + Return a list of python modeling files that are impacted by the changes of `tiny_model_summary.json` in between: + + - the current head and the main branch if `diff_with_last_commit=False` (default) + - the current head and its parent commit otherwise. + + Returns: + `List[str]`: The list of Python modeling files that are impacted by the changes of `tiny_model_summary.json`. + """ + repo = Repo(PATH_TO_REPO) + + folder = Path(repo.working_dir) + + if not diff_with_last_commit: + print(f"main is at {repo.refs.main.commit}") + print(f"Current head is at {repo.head.commit}") + + commits = repo.merge_base(repo.refs.main, repo.head) + for commit in commits: + print(f"Branching commit: {commit}") + else: + print(f"main is at {repo.head.commit}") + commits = repo.head.commit.parents + for commit in commits: + print(f"Parent commit: {commit}") + + if not os.path.isfile(folder / "tests/utils/tiny_model_summary.json"): + return [] + + files = set() + for commit in commits: + with checkout_commit(repo, commit): + with open(folder / "tests/utils/tiny_model_summary.json", "r", encoding="utf-8") as f: + old_content = f.read() + + with open(folder / "tests/utils/tiny_model_summary.json", "r", encoding="utf-8") as f: + new_content = f.read() + + # get the content as json object + old_content = json.loads(old_content) + new_content = json.loads(new_content) + + old_keys = set(old_content.keys()) + new_keys = set(new_content.keys()) + + # get the difference + keys_with_diff = old_keys.symmetric_difference(new_keys) + common_keys = old_keys.intersection(new_keys) + # if both have the same key, check its content + for key in common_keys: + if old_content[key] != new_content[key]: + keys_with_diff.add(key) + + # get the model classes + impacted_model_classes = [] + for key in keys_with_diff: + if key in new_keys: + impacted_model_classes.extend(new_content[key]["model_classes"]) + + # get the module where the model classes are defined. We want to use the main `__init__` file, but it requires + # all the framework being installed, which is not ideal for a simple script like test fetcher. + # So we create a temporary and modified main `__init__` and access its `_import_structure`. + with open(folder / "src/transformers/__init__.py") as fp: + lines = fp.readlines() + new_lines = [] + # Get all the code related to `_import_structure` + for line in lines: + if line == "_import_structure = {\n": + new_lines.append(line) + elif line == "# Direct imports for type-checking\n": + break + elif len(new_lines) > 0: + # bypass the framework check so we can get all the information even if frameworks are not available + line = re.sub(r"is_.+_available\(\)", "True", line) + line = line.replace("OptionalDependencyNotAvailable", "Exception") + line = line.replace("Exception()", "Exception") + new_lines.append(line) + + # create and load the temporary module + with tempfile.TemporaryDirectory() as tmpdirname: + with open(os.path.join(tmpdirname, "temp_init.py"), "w") as fp: + fp.write("".join(new_lines)) + + spec = importlib.util.spec_from_file_location("temp_init", os.path.join(tmpdirname, "temp_init.py")) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + # Finally, get `_import_structure` that we need + import_structure = module._import_structure + + # map model classes to their defined module + reversed_structure = {} + for key, values in import_structure.items(): + for value in values: + reversed_structure[value] = key + + # Get the corresponding modeling file path + for model_class in impacted_model_classes: + module = reversed_structure[model_class] + framework = "" + if model_class.startswith("TF"): + framework = "tf" + elif model_class.startswith("Flax"): + framework = "flax" + fn = ( + f"modeling_{module.split('.')[-1]}.py" + if framework == "" + else f"modeling_{framework}_{module.split('.')[-1]}.py" + ) + files.add( + f"src.transformers.{module}.{fn}".replace(".", os.path.sep).replace(f"{os.path.sep}py", ".py") + ) + + return sorted(files) + + def get_diff(repo: Repo, base_commit: str, commits: List[str]) -> List[str]: """ Get the diff between a base commit and one or several commits. @@ -949,18 +1067,16 @@ def infer_tests_to_run( if any(x in modified_files for x in ["setup.py", ".circleci/create_circleci_config.py"]): test_files_to_run = ["tests", "examples"] repo_utils_launch = True - # in order to trigger pipeline tests even if no code change at all - elif "tests/utils/tiny_model_summary.json" in modified_files: - test_files_to_run = ["tests"] - repo_utils_launch = any(f.split(os.path.sep)[0] == "utils" for f in modified_files) else: # All modified tests need to be run. test_files_to_run = [ f for f in modified_files if f.startswith("tests") and f.split(os.path.sep)[-1].startswith("test") ] + impacted_files = get_impacted_files_from_tiny_model_summary(diff_with_last_commit=diff_with_last_commit) + # Then we grab the corresponding test files. test_map = create_module_to_test_map(reverse_map=reverse_map, filter_models=filter_models) - for f in modified_files: + for f in modified_files + impacted_files: if f in test_map: test_files_to_run.extend(test_map[f]) test_files_to_run = sorted(set(test_files_to_run))