Black preview (#17217)

* Black preview

* Fixup too!

* Fix check copies

* Use the same version as the CI

* Bump black
This commit is contained in:
Sylvain Gugger
2022-05-12 16:25:55 -04:00
committed by GitHub
parent 9bd67ac7bb
commit afe5d42d8d
578 changed files with 8274 additions and 3296 deletions

View File

@@ -68,7 +68,10 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization. Leave None if you want to train a model from scratch."
"help": (
"The model checkpoint for weights initialization. Leave None if you want to train a model from"
" scratch."
)
},
)
model_type: Optional[str] = field(
@@ -99,8 +102,10 @@ class DataTrainingArguments:
train_data_files: Optional[str] = field(
default=None,
metadata={
"help": "The input training data files (multiple files in glob format). "
"Very often splitting large files to smaller files can prevent tokenizer going out of memory"
"help": (
"The input training data files (multiple files in glob format). "
"Very often splitting large files to smaller files can prevent tokenizer going out of memory"
)
},
)
eval_data_file: Optional[str] = field(
@@ -130,7 +135,10 @@ class DataTrainingArguments:
plm_probability: float = field(
default=1 / 6,
metadata={
"help": "Ratio of length of a span of masked tokens to surrounding context length for permutation language modeling."
"help": (
"Ratio of length of a span of masked tokens to surrounding context length for permutation language"
" modeling."
)
},
)
max_span_length: int = field(
@@ -140,9 +148,11 @@ class DataTrainingArguments:
block_size: int = field(
default=-1,
metadata={
"help": "Optional input sequence length after tokenization."
"The training dataset will be truncated in block of this size for training."
"Default to the model max input length for single sentence inputs (take into account special tokens)."
"help": (
"Optional input sequence length after tokenization."
"The training dataset will be truncated in block of this size for training."
"Default to the model max input length for single sentence inputs (take into account special tokens)."
)
},
)
overwrite_cache: bool = field(
@@ -206,7 +216,8 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
f"Output directory ({training_args.output_dir}) already exists and is not empty. Use"
" --overwrite_output_dir to overcome."
)
# Setup logging
@@ -253,8 +264,8 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it,"
"and load it from here, using --tokenizer_name"
"You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another"
" script, save it,and load it from here, using --tokenizer_name"
)
if model_args.model_name_or_path: