Update quality tooling for formatting (#21480)

* Result of black 23.1 * Update target to Python 3.7 * Switch flake8 to ruff * Configure isort * Configure isort * Apply isort with line limit * Put the right black version * adapt black in check copies * Fix copies
2023-02-06 18:10:56 -05:00
parent b7bb2b59f7
commit 6f79d26442
1211 changed files with 1532 additions and 2687 deletions
--- a/utils/check_copies.py
+++ b/utils/check_copies.py
@@ -177,7 +177,7 @@ def blackify(code):
    has_indent = len(get_indent(code)) > 0
    if has_indent:
        code = f"class Bla:\n{code}"
-    mode = black.Mode(target_versions={black.TargetVersion.PY35}, line_length=119, preview=True)
+    mode = black.Mode(target_versions={black.TargetVersion.PY37}, line_length=119)
    result = black.format_str(code, mode=mode)
    result, _ = style_docstrings_in_code(result)
    return result[len("class Bla:\n") :] if has_indent else result
--- a/utils/check_doctest_list.py
+++ b/utils/check_doctest_list.py
@@ -22,7 +22,6 @@ REPO_PATH = "."


 if __name__ == "__main__":
-
    doctest_file_path = os.path.join(REPO_PATH, "utils/documentation_tests.txt")
    non_existent_paths = []
    with open(doctest_file_path) as fp:
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -445,7 +445,7 @@ def get_model_test_files():
            path = os.path.join(target_dir, file_or_dir)
            if os.path.isfile(path):
                filename = os.path.split(path)[-1]
-                if "test_modeling" in filename and not os.path.splitext(filename)[0] in _ignore_files:
+                if "test_modeling" in filename and os.path.splitext(filename)[0] not in _ignore_files:
                    file = os.path.join(*path.split(os.sep)[1:])
                    test_files.append(file)

--- a/utils/check_self_hosted_runner.py
+++ b/utils/check_self_hosted_runner.py
@@ -4,7 +4,6 @@ import subprocess


 def get_runner_status(target_runners, token):
-
    offline_runners = []

    cmd = (
--- a/utils/create_dummy_models.py
+++ b/utils/create_dummy_models.py
@@ -25,10 +25,10 @@ import sys
 import tempfile
 from pathlib import Path

-from datasets import load_dataset
-
 from check_config_docstrings import get_checkpoint_from_config_class
+from datasets import load_dataset
 from huggingface_hub import Repository, create_repo, upload_folder
+
 from transformers import (
    CONFIG_MAPPING,
    FEATURE_EXTRACTOR_MAPPING,
@@ -350,7 +350,6 @@ def get_tiny_config(config_class, **model_tester_kwargs):


 def convert_tokenizer(tokenizer_fast: PreTrainedTokenizerFast):
-
    new_tokenizer = tokenizer_fast.train_new_from_iterator(training_ds["text"], TARGET_VOCAB_SIZE, show_progress=False)

    # Make sure it at least runs
@@ -361,7 +360,6 @@ def convert_tokenizer(tokenizer_fast: PreTrainedTokenizerFast):


 def convert_feature_extractor(feature_extractor, tiny_config):
-
    to_convert = False
    kwargs = {}
    if hasattr(tiny_config, "image_size"):
@@ -574,7 +572,6 @@ def upload_model(model_dir, organization):
        raise ValueError(error)

    with tempfile.TemporaryDirectory() as tmpdir:
-
        repo = Repository(local_dir=tmpdir, clone_from=f"{organization}/{repo_name}")
        repo.git_pull()
        shutil.copytree(model_dir, tmpdir, dirs_exist_ok=True)
@@ -599,7 +596,6 @@ def upload_model(model_dir, organization):


 def build_composite_models(config_class, output_dir):
-
    import tempfile

    from transformers import (
@@ -668,7 +664,6 @@ def build_composite_models(config_class, output_dir):
        tf_model_class = None

    with tempfile.TemporaryDirectory() as tmpdir:
-
        try:
            # build encoder
            models_to_create = {"processor": encoder_processor, "pytorch": (encoder_class,), "tensorflow": []}
@@ -761,7 +756,6 @@ def get_token_id_from_tokenizer(token_id_name, tokenizer, original_token_id):


 def get_config_overrides(config_class, processors):
-
    config_overrides = {}

    # Check if there is any tokenizer (prefer fast version if any)
@@ -990,7 +984,6 @@ def build(config_class, models_to_create, output_dir):


 def build_failed_report(results, include_warning=True):
-
    failed_results = {}
    for config_name in results:
        if "error" in results[config_name]:
@@ -1021,7 +1014,6 @@ def build_failed_report(results, include_warning=True):


 def build_simple_report(results):
-
    text = ""
    failed_text = ""
    for config_name in results:
@@ -1040,7 +1032,6 @@ def build_simple_report(results):


 if __name__ == "__main__":
-
    clone_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
    if os.getcwd() != clone_path:
        raise ValueError(f"This script should be run from the root of the clone of `transformers` {clone_path}")
--- a/utils/custom_init_isort.py
+++ b/utils/custom_init_isort.py
@@ -96,6 +96,7 @@ def ignore_underscore(key):

 def sort_objects(objects, key=None):
    "Sort a list of `objects` following the rules of isort. `key` optionally maps an object to a str."
+
    # If no key is provided, we use a noop.
    def noop(x):
        return x
@@ -117,6 +118,7 @@ def sort_objects_in_import(import_statement):
    """
    Return the same `import_statement` but with objects properly sorted.
    """
+
    # This inner function sort imports between [ ].
    def _replace(match):
        imports = match.groups()[0]
--- a/utils/extract_warnings.py
+++ b/utils/extract_warnings.py
@@ -5,6 +5,7 @@ import time
 import zipfile

 from get_ci_error_statistics import download_artifact, get_artifacts_links
+
 from transformers import logging


--- a/utils/get_ci_error_statistics.py
+++ b/utils/get_ci_error_statistics.py
@@ -209,7 +209,6 @@ def make_github_table_per_model(reduced_by_model):


 if __name__ == "__main__":
-
    parser = argparse.ArgumentParser()
    # Required parameters
    parser.add_argument(
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -413,7 +413,6 @@ class Message:

    @staticmethod
    def error_out(title, ci_title="", runner_not_available=False, runner_failed=False, setup_failed=False):
-
        blocks = []
        title_block = {"type": "header", "text": {"type": "plain_text", "text": title}}
        blocks.append(title_block)
@@ -691,7 +690,6 @@ def prepare_reports(title, header, reports, to_truncate=True):


 if __name__ == "__main__":
-
    runner_status = os.environ.get("RUNNER_STATUS")
    runner_env_status = os.environ.get("RUNNER_ENV_STATUS")
    setup_status = os.environ.get("SETUP_STATUS")
@@ -832,7 +830,6 @@ if __name__ == "__main__":

                for line in artifact["summary_short"].split("\n"):
                    if re.search("FAILED", line):
-
                        line = line.replace("FAILED ", "")
                        line = line.split()[0].replace("\n", "")

@@ -897,7 +894,6 @@ if __name__ == "__main__":
    }

    for key in additional_results.keys():
-
        # If a whole suite of test fails, the artifact isn't available.
        if additional_files[key] not in available_artifacts:
            additional_results[key]["error"] = True
--- a/utils/notification_service_doc_tests.py
+++ b/utils/notification_service_doc_tests.py
@@ -323,7 +323,6 @@ def retrieve_available_artifacts():


 if __name__ == "__main__":
-
    github_actions_job_links = get_job_links()
    available_artifacts = retrieve_available_artifacts()

@@ -359,7 +358,6 @@ if __name__ == "__main__":
        all_failures = extract_first_line_failure(artifact["failures_short"])
        for line in artifact["summary_short"].split("\n"):
            if re.search("FAILED", line):
-
                line = line.replace("FAILED ", "")
                line = line.split()[0].replace("\n", "")

--- a/utils/update_metadata.py
+++ b/utils/update_metadata.py
@@ -22,7 +22,6 @@ import tempfile

 import pandas as pd
 from datasets import Dataset
-
 from huggingface_hub import Repository