Black preview (#17217)
* Black preview * Fixup too! * Fix check copies * Use the same version as the CI * Bump black
This commit is contained in:
@@ -99,7 +99,9 @@ class DataTrainingArguments:
|
||||
validation_split_name: Optional[str] = field(
|
||||
default="validation",
|
||||
metadata={
|
||||
"help": "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
|
||||
"help": (
|
||||
"The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
|
||||
)
|
||||
},
|
||||
)
|
||||
target_text_column: Optional[str] = field(
|
||||
@@ -121,7 +123,10 @@ class DataTrainingArguments:
|
||||
orthography: Optional[str] = field(
|
||||
default="librispeech",
|
||||
metadata={
|
||||
"help": "Orthography used for normalization and tokenization: 'librispeech' (default), 'timit', or 'buckwalter'."
|
||||
"help": (
|
||||
"Orthography used for normalization and tokenization: 'librispeech' (default), 'timit', or"
|
||||
" 'buckwalter'."
|
||||
)
|
||||
},
|
||||
)
|
||||
overwrite_cache: bool = field(
|
||||
@@ -392,11 +397,13 @@ def main():
|
||||
val_dataset = val_dataset.filter(filter_by_max_duration, remove_columns=["duration_in_seconds"])
|
||||
if len(train_dataset) > old_train_size:
|
||||
logger.warning(
|
||||
f"Filtered out {len(train_dataset) - old_train_size} train example(s) longer than {data_args.max_duration_in_seconds} second(s)."
|
||||
f"Filtered out {len(train_dataset) - old_train_size} train example(s) longer than"
|
||||
f" {data_args.max_duration_in_seconds} second(s)."
|
||||
)
|
||||
if len(val_dataset) > old_val_size:
|
||||
logger.warning(
|
||||
f"Filtered out {len(val_dataset) - old_val_size} validation example(s) longer than {data_args.max_duration_in_seconds} second(s)."
|
||||
f"Filtered out {len(val_dataset) - old_val_size} validation example(s) longer than"
|
||||
f" {data_args.max_duration_in_seconds} second(s)."
|
||||
)
|
||||
logger.info(f"Split sizes: {len(train_dataset)} train and {len(val_dataset)} validation.")
|
||||
|
||||
|
||||
@@ -79,9 +79,11 @@ class ModelArguments:
|
||||
mask_time_prob: Optional[float] = field(
|
||||
default=0.05,
|
||||
metadata={
|
||||
"help": "Propability of each feature vector along the time axis to be chosen as the start of the vector"
|
||||
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
|
||||
"vectors will be masked along the time axis. This is only relevant if ``apply_spec_augment is True``."
|
||||
"help": (
|
||||
"Propability of each feature vector along the time axis to be chosen as the start of the vector"
|
||||
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
|
||||
"vectors will be masked along the time axis. This is only relevant if ``apply_spec_augment is True``."
|
||||
)
|
||||
},
|
||||
)
|
||||
layerdrop: Optional[float] = field(default=0.0, metadata={"help": "The LayerDrop probability."})
|
||||
@@ -116,15 +118,19 @@ class DataTrainingArguments:
|
||||
max_train_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_val_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of validation examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
chars_to_ignore: List[str] = list_field(
|
||||
|
||||
@@ -104,7 +104,9 @@ class DataTrainingArguments:
|
||||
validation_split_name: Optional[str] = field(
|
||||
default="validation",
|
||||
metadata={
|
||||
"help": "The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
|
||||
"help": (
|
||||
"The name of the validation data set split to use (via the datasets library). Defaults to 'validation'"
|
||||
)
|
||||
},
|
||||
)
|
||||
speech_file_column: Optional[str] = field(
|
||||
@@ -369,7 +371,8 @@ def main():
|
||||
|
||||
if not config.do_stable_layer_norm or config.feat_extract_norm != "layer":
|
||||
raise ValueError(
|
||||
"PreTraining is only supported for ``config.do_stable_layer_norm=True`` and ``config.feat_extract_norm='layer'"
|
||||
"PreTraining is only supported for ``config.do_stable_layer_norm=True`` and"
|
||||
" ``config.feat_extract_norm='layer'"
|
||||
)
|
||||
|
||||
model = Wav2Vec2ForPreTraining(config)
|
||||
|
||||
Reference in New Issue
Block a user