Black preview (#17217)

* Black preview

* Fixup too!

* Fix check copies

* Use the same version as the CI

* Bump black
This commit is contained in:
Sylvain Gugger
2022-05-12 16:25:55 -04:00
committed by GitHub
parent 9bd67ac7bb
commit afe5d42d8d
578 changed files with 8274 additions and 3296 deletions

View File

@@ -89,7 +89,7 @@ class ModelArguments:
cache_dir: Optional[str] = field(
default=None,
metadata={
"help": "Where do you want to store the pretrained models and datasets downloaded from " "huggingface.co"
"help": "Where do you want to store the pretrained models and datasets downloaded from huggingface.co"
},
)
freeze_feature_encoder: bool = field(
@@ -115,9 +115,11 @@ class ModelArguments:
mask_time_prob: float = field(
default=0.05,
metadata={
"help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
"vectors will be masked along the time axis."
"help": (
"Probability of each feature vector along the time axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
"vectors will be masked along the time axis."
)
},
)
mask_time_length: int = field(
@@ -127,8 +129,11 @@ class ModelArguments:
mask_feature_prob: float = field(
default=0.0,
metadata={
"help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
"help": (
"Probability of each feature vector along the feature axis to be chosen as the start of the vectorspan"
" to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature"
" bins will be masked along the time axis."
)
},
)
mask_feature_length: int = field(
@@ -162,8 +167,10 @@ class DataTrainingArguments:
task: str = field(
default=None,
metadata={
"help": "The task name of the benchmark to use (via the datasets library). Should be on of: "
"'fleurs-asr', 'mls', 'voxpopuli', 'covost2', 'minds14', 'fleurs-lang_id', 'babel'."
"help": (
"The task name of the benchmark to use (via the datasets library). Should be on of: "
"'fleurs-asr', 'mls', 'voxpopuli', 'covost2', 'minds14', 'fleurs-lang_id', 'babel'."
)
},
)
language: str = field(
@@ -173,10 +180,12 @@ class DataTrainingArguments:
language_group: str = field(
default=None,
metadata={
"help": "The language group to select a subset of languages to train on. "
"This option is only used the 'fleurs-asr' task. Should be one of: "
"'western_european_we', 'eastern_european_ee', 'central_asia_middle_north_african_cmn', "
"'sub_saharan_african_ssa', 'south_asian_sa', 'south_east_asian_sea', 'chinese_japanase_korean_cjk'."
"help": (
"The language group to select a subset of languages to train on. "
"This option is only used the 'fleurs-asr' task. Should be one of: "
"'western_european_we', 'eastern_european_ee', 'central_asia_middle_north_african_cmn', "
"'sub_saharan_african_ssa', 'south_asian_sa', 'south_east_asian_sea', 'chinese_japanase_korean_cjk'."
)
},
)
train_split_name: str = field(
@@ -188,14 +197,15 @@ class DataTrainingArguments:
eval_split_name: str = field(
default="validation",
metadata={
"help": "The name of the evaluation dataset split to use (via the datasets library). "
"Defaults to 'validation'"
"help": (
"The name of the evaluation dataset split to use (via the datasets library). Defaults to 'validation'"
)
},
)
predict_split_name: str = field(
default="test",
metadata={
"help": "The name of the prediction dataset split to use (via the datasets library). " "Defaults to 'test'"
"help": "The name of the prediction dataset split to use (via the datasets library). Defaults to 'test'"
},
)
audio_column_name: str = field(
@@ -205,8 +215,10 @@ class DataTrainingArguments:
target_column_name: str = field(
default=None,
metadata={
"help": "The name of the dataset column containing the target data "
"(transcription/translation/label). If None, the name will be inferred from the task. Defaults to None."
"help": (
"The name of the dataset column containing the target data (transcription/translation/label). If None,"
" the name will be inferred from the task. Defaults to None."
)
},
)
overwrite_cache: bool = field(
@@ -219,22 +231,28 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_eval_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of validation examples to this "
"value if set."
)
},
)
max_predict_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
"value if set."
)
},
)
chars_to_ignore: Optional[List[str]] = list_field(
@@ -244,7 +262,10 @@ class DataTrainingArguments:
max_duration_in_seconds: float = field(
default=30.0,
metadata={
"help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
"help": (
"Filter audio files that are longer than `max_duration_in_seconds` seconds to"
" 'max_duration_in_seconds`"
)
},
)
min_duration_in_seconds: float = field(
@@ -253,17 +274,21 @@ class DataTrainingArguments:
preprocessing_only: bool = field(
default=False,
metadata={
"help": "Whether to only do data preprocessing and skip training. "
"This is especially useful when data preprocessing errors out in distributed training due to timeout. "
"In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
"so that the cached datasets can consequently be loaded in distributed training"
"help": (
"Whether to only do data preprocessing and skip training. This is especially useful when data"
" preprocessing errors out in distributed training due to timeout. In this case, one should run the"
" preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets"
" can consequently be loaded in distributed training"
)
},
)
use_auth_token: bool = field(
default=False,
metadata={
"help": "If :obj:`True`, will use the token generated when running"
":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
"help": (
"If :obj:`True`, will use the token generated when running"
":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
)
},
)
unk_token: str = field(
@@ -281,17 +306,21 @@ class DataTrainingArguments:
phoneme_language: Optional[str] = field(
default=None,
metadata={
"help": "The target language that should be used be"
" passed to the tokenizer for tokenization. Note that"
" this is only relevant if the model classifies the"
" input audio to a sequence of phoneme sequences."
"help": (
"The target language that should be used be"
" passed to the tokenizer for tokenization. Note that"
" this is only relevant if the model classifies the"
" input audio to a sequence of phoneme sequences."
)
},
)
per_lang_metrics: bool = field(
default=True,
metadata={
"help": "If `True`, compute the test metrics separately for each language, and average the results. "
"If `False` compute the average test metrics in a single pass for all languages at once."
"help": (
"If `True`, compute the test metrics separately for each language, and average the results. "
"If `False` compute the average test metrics in a single pass for all languages at once."
)
},
)
@@ -446,7 +475,7 @@ def main():
if task_name is None:
raise ValueError(
"Set --task should be set to '<xtreme_s_task>' " "(e.g. 'fleurs-asr', 'mls', 'covost2', 'minds14') "
"Set --task should be set to '<xtreme_s_task>' (e.g. 'fleurs-asr', 'mls', 'covost2', 'minds14') "
)
if lang_id is None:
raise ValueError(
@@ -481,9 +510,9 @@ def main():
if data_args.audio_column_name not in raw_datasets["train"].column_names:
raise ValueError(
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
"Make sure to set `--audio_column_name` to the correct audio column - one of "
f"{', '.join(raw_datasets['train'].column_names)}."
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'."
" Make sure to set `--audio_column_name` to the correct audio column - one of"
f" {', '.join(raw_datasets['train'].column_names)}."
)
if target_column_name not in raw_datasets["train"].column_names:
@@ -903,7 +932,10 @@ def main():
"finetuned_from": model_args.model_name_or_path,
"tasks": task_name,
"tags": [task_name, data_args.dataset_name],
"dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}, Predict split: {data_args.predict_split_name}",
"dataset_args": (
f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:"
f" {data_args.eval_split_name}, Predict split: {data_args.predict_split_name}"
),
"dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
"language": data_args.language,
}