Make parallelism for CircleCI jobs work - but keep it 1 for now (#21157)

* split tests

* test CI

* add if else

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2023-01-20 16:41:33 +01:00
committed by GitHub
parent 2553363826
commit b0969cafd0

View File

@@ -15,7 +15,9 @@
import argparse import argparse
import copy import copy
import glob
import os import os
import random
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
@@ -58,6 +60,8 @@ class CircleCIJob:
self.pytest_options = {} self.pytest_options = {}
if isinstance(self.tests_to_run, str): if isinstance(self.tests_to_run, str):
self.tests_to_run = [self.tests_to_run] self.tests_to_run = [self.tests_to_run]
if self.parallelism is None:
self.parallelism = 1
def to_dict(self): def to_dict(self):
job = { job = {
@@ -99,10 +103,57 @@ class CircleCIJob:
f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}" f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
) )
test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags) test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
if self.tests_to_run is None: if self.parallelism == 1:
test_command += " << pipeline.parameters.tests_to_run >>" if self.tests_to_run is None:
test_command += " << pipeline.parameters.tests_to_run >>"
else:
test_command += " " + " ".join(self.tests_to_run)
else: else:
test_command += " " + " ".join(self.tests_to_run) # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime)
tests = self.tests_to_run
if tests is None:
folder = os.environ["test_preparation_dir"]
test_file = os.path.join(folder, "filtered_test_list.txt")
if os.path.exists(test_file):
with open(test_file) as f:
tests = f.read().split(" ")
# expand the test list
if tests == ["tests"]:
tests = [os.path.join("tests", x) for x in os.listdir("tests")]
expanded_tests = []
for test in tests:
if test.endswith(".py"):
expanded_tests.append(test)
elif test == "tests/models":
expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
elif test == "tests/pipelines":
expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
else:
expanded_tests.append(test)
# Avoid long tests always being collected together
random.shuffle(expanded_tests)
tests = " ".join(expanded_tests)
# Each executor to run ~10 tests
n_executors = max(len(tests) // 10, 1)
# Avoid empty test list on some executor(s) or launching too many executors
if n_executors > self.parallelism:
n_executors = self.parallelism
job["parallelism"] = n_executors
# Need to be newline separated for the command `circleci tests split` below
command = f'echo {tests} | tr " " "\\n" >> tests.txt'
steps.append({"run": {"name": "Get tests", "command": command}})
command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt'
steps.append({"run": {"name": "Split tests", "command": command}})
steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
test_command += " $(cat splitted_tests.txt)"
if self.marker is not None: if self.marker is not None:
test_command += f" -m {self.marker}" test_command += f" -m {self.marker}"
test_command += " | tee tests_output.txt" test_command += " | tee tests_output.txt"
@@ -156,6 +207,7 @@ torch_job = CircleCIJob(
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]", "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
"pip install git+https://github.com/huggingface/accelerate", "pip install git+https://github.com/huggingface/accelerate",
], ],
parallelism=1,
pytest_num_workers=3, pytest_num_workers=3,
) )
@@ -168,6 +220,7 @@ tf_job = CircleCIJob(
"pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]", "pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
"pip install tensorflow_probability", "pip install tensorflow_probability",
], ],
parallelism=1,
pytest_options={"rA": None}, pytest_options={"rA": None},
) )
@@ -179,6 +232,7 @@ flax_job = CircleCIJob(
"pip install --upgrade pip", "pip install --upgrade pip",
"pip install .[flax,testing,sentencepiece,flax-speech,vision]", "pip install .[flax,testing,sentencepiece,flax-speech,vision]",
], ],
parallelism=1,
pytest_options={"rA": None}, pytest_options={"rA": None},
) )
@@ -356,6 +410,8 @@ REPO_UTIL_TESTS = [repo_utils_job]
def create_circleci_config(folder=None): def create_circleci_config(folder=None):
if folder is None: if folder is None:
folder = os.getcwd() folder = os.getcwd()
# Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
os.environ["test_preparation_dir"] = folder
jobs = [] jobs = []
all_test_file = os.path.join(folder, "test_list.txt") all_test_file = os.path.join(folder, "test_list.txt")
if os.path.exists(all_test_file): if os.path.exists(all_test_file):