Black preview (#17217)

* Black preview * Fixup too! * Fix check copies * Use the same version as the CI * Bump black
2022-05-12 16:25:55 -04:00
parent 9bd67ac7bb
commit afe5d42d8d
578 changed files with 8274 additions and 3296 deletions
--- a/examples/legacy/run_language_modeling.py
+++ b/examples/legacy/run_language_modeling.py
@@ -68,7 +68,10 @@ class ModelArguments:
    model_name_or_path: Optional[str] = field(
        default=None,
        metadata={
-            "help": "The model checkpoint for weights initialization. Leave None if you want to train a model from scratch."
+            "help": (
+                "The model checkpoint for weights initialization. Leave None if you want to train a model from"
+                " scratch."
+            )
        },
    )
    model_type: Optional[str] = field(
@@ -99,8 +102,10 @@ class DataTrainingArguments:
    train_data_files: Optional[str] = field(
        default=None,
        metadata={
-            "help": "The input training data files (multiple files in glob format). "
-            "Very often splitting large files to smaller files can prevent tokenizer going out of memory"
+            "help": (
+                "The input training data files (multiple files in glob format). "
+                "Very often splitting large files to smaller files can prevent tokenizer going out of memory"
+            )
        },
    )
    eval_data_file: Optional[str] = field(
@@ -130,7 +135,10 @@ class DataTrainingArguments:
    plm_probability: float = field(
        default=1 / 6,
        metadata={
-            "help": "Ratio of length of a span of masked tokens to surrounding context length for permutation language modeling."
+            "help": (
+                "Ratio of length of a span of masked tokens to surrounding context length for permutation language"
+                " modeling."
+            )
        },
    )
    max_span_length: int = field(
@@ -140,9 +148,11 @@ class DataTrainingArguments:
    block_size: int = field(
        default=-1,
        metadata={
-            "help": "Optional input sequence length after tokenization."
-            "The training dataset will be truncated in block of this size for training."
-            "Default to the model max input length for single sentence inputs (take into account special tokens)."
+            "help": (
+                "Optional input sequence length after tokenization."
+                "The training dataset will be truncated in block of this size for training."
+                "Default to the model max input length for single sentence inputs (take into account special tokens)."
+            )
        },
    )
    overwrite_cache: bool = field(
@@ -206,7 +216,8 @@ def main():
        and not training_args.overwrite_output_dir
    ):
        raise ValueError(
-            f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
+            f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
+            " --overwrite_output_dir to overcome."
        )

    # Setup logging
@@ -253,8 +264,8 @@ def main():
        tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir)
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it,"
-            "and load it from here, using --tokenizer_name"
+            "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another"
+            " script, save it,and load it from here, using --tokenizer_name"
        )

    if model_args.model_name_or_path: