Black preview (#17217)

* Black preview * Fixup too! * Fix check copies * Use the same version as the CI * Bump black
2022-05-12 16:25:55 -04:00
parent 9bd67ac7bb
commit afe5d42d8d
578 changed files with 8274 additions and 3296 deletions
--- a/examples/research_projects/xtreme-s/run_xtreme_s.py
+++ b/examples/research_projects/xtreme-s/run_xtreme_s.py
@@ -89,7 +89,7 @@ class ModelArguments:
    cache_dir: Optional[str] = field(
        default=None,
        metadata={
-            "help": "Where do you want to store the pretrained models and datasets downloaded from " "huggingface.co"
+            "help": "Where do you want to store the pretrained models and datasets downloaded from huggingface.co"
        },
    )
    freeze_feature_encoder: bool = field(
@@ -115,9 +115,11 @@ class ModelArguments:
    mask_time_prob: float = field(
        default=0.05,
        metadata={
-            "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
-            "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
-            "vectors will be masked along the time axis."
+            "help": (
+                "Probability of each feature vector along the time axis to be chosen as the start of the vector"
+                "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
+                "vectors will be masked along the time axis."
+            )
        },
    )
    mask_time_length: int = field(
@@ -127,8 +129,11 @@ class ModelArguments:
    mask_feature_prob: float = field(
        default=0.0,
        metadata={
-            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
-            "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+            "help": (
+                "Probability of each feature vector along the feature axis to be chosen as the start of the vectorspan"
+                " to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature"
+                " bins will be masked along the time axis."
+            )
        },
    )
    mask_feature_length: int = field(
@@ -162,8 +167,10 @@ class DataTrainingArguments:
    task: str = field(
        default=None,
        metadata={
-            "help": "The task name of the benchmark to use (via the datasets library). Should be on of: "
-            "'fleurs-asr', 'mls', 'voxpopuli', 'covost2', 'minds14', 'fleurs-lang_id', 'babel'."
+            "help": (
+                "The task name of the benchmark to use (via the datasets library). Should be on of: "
+                "'fleurs-asr', 'mls', 'voxpopuli', 'covost2', 'minds14', 'fleurs-lang_id', 'babel'."
+            )
        },
    )
    language: str = field(
@@ -173,10 +180,12 @@ class DataTrainingArguments:
    language_group: str = field(
        default=None,
        metadata={
-            "help": "The language group to select a subset of languages to train on. "
-            "This option is only used the 'fleurs-asr' task. Should be one of: "
-            "'western_european_we', 'eastern_european_ee', 'central_asia_middle_north_african_cmn', "
-            "'sub_saharan_african_ssa', 'south_asian_sa', 'south_east_asian_sea', 'chinese_japanase_korean_cjk'."
+            "help": (
+                "The language group to select a subset of languages to train on. "
+                "This option is only used the 'fleurs-asr' task. Should be one of: "
+                "'western_european_we', 'eastern_european_ee', 'central_asia_middle_north_african_cmn', "
+                "'sub_saharan_african_ssa', 'south_asian_sa', 'south_east_asian_sea', 'chinese_japanase_korean_cjk'."
+            )
        },
    )
    train_split_name: str = field(
@@ -188,14 +197,15 @@ class DataTrainingArguments:
    eval_split_name: str = field(
        default="validation",
        metadata={
-            "help": "The name of the evaluation dataset split to use (via the datasets library). "
-            "Defaults to 'validation'"
+            "help": (
+                "The name of the evaluation dataset split to use (via the datasets library). Defaults to 'validation'"
+            )
        },
    )
    predict_split_name: str = field(
        default="test",
        metadata={
-            "help": "The name of the prediction dataset split to use (via the datasets library). " "Defaults to 'test'"
+            "help": "The name of the prediction dataset split to use (via the datasets library). Defaults to 'test'"
        },
    )
    audio_column_name: str = field(
@@ -205,8 +215,10 @@ class DataTrainingArguments:
    target_column_name: str = field(
        default=None,
        metadata={
-            "help": "The name of the dataset column containing the target data "
-            "(transcription/translation/label). If None, the name will be inferred from the task. Defaults to None."
+            "help": (
+                "The name of the dataset column containing the target data (transcription/translation/label). If None,"
+                " the name will be inferred from the task. Defaults to None."
+            )
        },
    )
    overwrite_cache: bool = field(
@@ -219,22 +231,28 @@ class DataTrainingArguments:
    max_train_samples: Optional[int] = field(
        default=None,
        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
+            "help": (
+                "For debugging purposes or quicker training, truncate the number of training examples to this "
+                "value if set."
+            )
        },
    )
    max_eval_samples: Optional[int] = field(
        default=None,
        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
-            "value if set."
+            "help": (
+                "For debugging purposes or quicker training, truncate the number of validation examples to this "
+                "value if set."
+            )
        },
    )
    max_predict_samples: Optional[int] = field(
        default=None,
        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
-            "value if set."
+            "help": (
+                "For debugging purposes or quicker training, truncate the number of prediction examples to this "
+                "value if set."
+            )
        },
    )
    chars_to_ignore: Optional[List[str]] = list_field(
@@ -244,7 +262,10 @@ class DataTrainingArguments:
    max_duration_in_seconds: float = field(
        default=30.0,
        metadata={
-            "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+            "help": (
+                "Filter audio files that are longer than `max_duration_in_seconds` seconds to"
+                " 'max_duration_in_seconds`"
+            )
        },
    )
    min_duration_in_seconds: float = field(
@@ -253,17 +274,21 @@ class DataTrainingArguments:
    preprocessing_only: bool = field(
        default=False,
        metadata={
-            "help": "Whether to only do data preprocessing and skip training. "
-            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
-            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
-            "so that the cached datasets can consequently be loaded in distributed training"
+            "help": (
+                "Whether to only do data preprocessing and skip training. This is especially useful when data"
+                " preprocessing errors out in distributed training due to timeout. In this case, one should run the"
+                " preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets"
+                " can consequently be loaded in distributed training"
+            )
        },
    )
    use_auth_token: bool = field(
        default=False,
        metadata={
-            "help": "If :obj:`True`, will use the token generated when running"
-            ":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
+            "help": (
+                "If :obj:`True`, will use the token generated when running"
+                ":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
+            )
        },
    )
    unk_token: str = field(
@@ -281,17 +306,21 @@ class DataTrainingArguments:
    phoneme_language: Optional[str] = field(
        default=None,
        metadata={
-            "help": "The target language that should be used be"
-            " passed to the tokenizer for tokenization. Note that"
-            " this is only relevant if the model classifies the"
-            " input audio to a sequence of phoneme sequences."
+            "help": (
+                "The target language that should be used be"
+                " passed to the tokenizer for tokenization. Note that"
+                " this is only relevant if the model classifies the"
+                " input audio to a sequence of phoneme sequences."
+            )
        },
    )
    per_lang_metrics: bool = field(
        default=True,
        metadata={
-            "help": "If `True`, compute the test metrics separately for each language, and average the results. "
-            "If `False` compute the average test metrics in a single pass for all languages at once."
+            "help": (
+                "If `True`, compute the test metrics separately for each language, and average the results. "
+                "If `False` compute the average test metrics in a single pass for all languages at once."
+            )
        },
    )

@@ -446,7 +475,7 @@ def main():

    if task_name is None:
        raise ValueError(
-            "Set --task should be set to '<xtreme_s_task>' " "(e.g. 'fleurs-asr', 'mls', 'covost2', 'minds14') "
+            "Set --task should be set to '<xtreme_s_task>' (e.g. 'fleurs-asr', 'mls', 'covost2', 'minds14') "
        )
    if lang_id is None:
        raise ValueError(
@@ -481,9 +510,9 @@ def main():

        if data_args.audio_column_name not in raw_datasets["train"].column_names:
            raise ValueError(
-                f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
-                "Make sure to set `--audio_column_name` to the correct audio column - one of "
-                f"{', '.join(raw_datasets['train'].column_names)}."
+                f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'."
+                " Make sure to set `--audio_column_name` to the correct audio column - one of"
+                f" {', '.join(raw_datasets['train'].column_names)}."
            )

        if target_column_name not in raw_datasets["train"].column_names:
@@ -903,7 +932,10 @@ def main():
        "finetuned_from": model_args.model_name_or_path,
        "tasks": task_name,
        "tags": [task_name, data_args.dataset_name],
-        "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}, Predict split: {data_args.predict_split_name}",
+        "dataset_args": (
+            f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:"
+            f" {data_args.eval_split_name}, Predict split: {data_args.predict_split_name}"
+        ),
        "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
        "language": data_args.language,
    }