Black preview (#17217)

* Black preview

* Fixup too!

* Fix check copies

* Use the same version as the CI

* Bump black
This commit is contained in:
Sylvain Gugger
2022-05-12 16:25:55 -04:00
committed by GitHub
parent 9bd67ac7bb
commit afe5d42d8d
578 changed files with 8274 additions and 3296 deletions

View File

@@ -99,7 +99,9 @@ class DataTrainingArguments:
validation_split_name: Optional[str] = field(
default="validation",
metadata={
"help": "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
"help": (
"The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
)
},
)
target_text_column: Optional[str] = field(
@@ -121,7 +123,10 @@ class DataTrainingArguments:
orthography: Optional[str] = field(
default="librispeech",
metadata={
"help": "Orthography used for normalization and tokenization: 'librispeech' (default), 'timit', or 'buckwalter'."
"help": (
"Orthography used for normalization and tokenization: 'librispeech' (default), 'timit', or"
" 'buckwalter'."
)
},
)
overwrite_cache: bool = field(
@@ -392,11 +397,13 @@ def main():
val_dataset = val_dataset.filter(filter_by_max_duration, remove_columns=["duration_in_seconds"])
if len(train_dataset) > old_train_size:
logger.warning(
f"Filtered out {len(train_dataset) - old_train_size} train example(s) longer than {data_args.max_duration_in_seconds} second(s)."
f"Filtered out {len(train_dataset) - old_train_size} train example(s) longer than"
f" {data_args.max_duration_in_seconds} second(s)."
)
if len(val_dataset) > old_val_size:
logger.warning(
f"Filtered out {len(val_dataset) - old_val_size} validation example(s) longer than {data_args.max_duration_in_seconds} second(s)."
f"Filtered out {len(val_dataset) - old_val_size} validation example(s) longer than"
f" {data_args.max_duration_in_seconds} second(s)."
)
logger.info(f"Split sizes: {len(train_dataset)} train and {len(val_dataset)} validation.")

View File

@@ -79,9 +79,11 @@ class ModelArguments:
mask_time_prob: Optional[float] = field(
default=0.05,
metadata={
"help": "Propability of each feature vector along the time axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
"vectors will be masked along the time axis. This is only relevant if ``apply_spec_augment is True``."
"help": (
"Propability of each feature vector along the time axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
"vectors will be masked along the time axis. This is only relevant if ``apply_spec_augment is True``."
)
},
)
layerdrop: Optional[float] = field(default=0.0, metadata={"help": "The LayerDrop probability."})
@@ -116,15 +118,19 @@ class DataTrainingArguments:
max_train_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of training examples to this "
"value if set."
)
},
)
max_val_samples: Optional[int] = field(
default=None,
metadata={
"help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
"value if set."
"help": (
"For debugging purposes or quicker training, truncate the number of validation examples to this "
"value if set."
)
},
)
chars_to_ignore: List[str] = list_field(

View File

@@ -104,7 +104,9 @@ class DataTrainingArguments:
validation_split_name: Optional[str] = field(
default="validation",
metadata={
"help": "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
"help": (
"The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
)
},
)
speech_file_column: Optional[str] = field(
@@ -369,7 +371,8 @@ def main():
if not config.do_stable_layer_norm or config.feat_extract_norm != "layer":
raise ValueError(
"PreTraining is only supported for ``config.do_stable_layer_norm=True`` and ``config.feat_extract_norm='layer'"
"PreTraining is only supported for ``config.do_stable_layer_norm=True`` and"
" ``config.feat_extract_norm='layer'"
)
model = Wav2Vec2ForPreTraining(config)