Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
This commit is contained in:
@@ -37,7 +37,7 @@ class ModelArguments:
|
||||
encoder_model_name_or_path: str = field(
|
||||
metadata={
|
||||
"help": (
|
||||
"The encoder model checkpoint for weights initialization."
|
||||
"The encoder model checkpoint for weights initialization. "
|
||||
"Don't set if you want to train an encoder model from scratch."
|
||||
)
|
||||
},
|
||||
@@ -45,7 +45,7 @@ class ModelArguments:
|
||||
decoder_model_name_or_path: str = field(
|
||||
metadata={
|
||||
"help": (
|
||||
"The decoder model checkpoint for weights initialization."
|
||||
"The decoder model checkpoint for weights initialization. "
|
||||
"Don't set if you want to train a decoder model from scratch."
|
||||
)
|
||||
},
|
||||
|
||||
@@ -203,7 +203,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -256,7 +256,7 @@ class DataTrainingArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
|
||||
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
|
||||
"during evaluation."
|
||||
)
|
||||
@@ -423,7 +423,7 @@ def main():
|
||||
and not training_args.overwrite_output_dir
|
||||
):
|
||||
raise ValueError(
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
||||
"Use --overwrite_output_dir to overcome."
|
||||
)
|
||||
|
||||
@@ -685,7 +685,7 @@ def main():
|
||||
eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
|
||||
if training_args.block_size % train_batch_size > 0 or training_args.block_size % eval_batch_size > 0:
|
||||
raise ValueError(
|
||||
"`training_args.block_size` needs to be a multiple of the global train/eval batch size."
|
||||
"`training_args.block_size` needs to be a multiple of the global train/eval batch size. "
|
||||
f"Got {training_args.block_size}, {train_batch_size} and {eval_batch_size} respectively instead."
|
||||
)
|
||||
|
||||
|
||||
@@ -487,7 +487,7 @@ def main():
|
||||
and not training_args.overwrite_output_dir
|
||||
):
|
||||
raise ValueError(
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
||||
"Use --overwrite_output_dir to overcome."
|
||||
)
|
||||
|
||||
@@ -606,7 +606,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
|
||||
@@ -190,7 +190,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -368,7 +368,7 @@ def main():
|
||||
and not training_args.overwrite_output_dir
|
||||
):
|
||||
raise ValueError(
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
||||
"Use --overwrite_output_dir to overcome."
|
||||
)
|
||||
|
||||
@@ -524,7 +524,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
@@ -586,7 +586,7 @@ def main():
|
||||
else:
|
||||
if data_args.block_size > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
|
||||
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
|
||||
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
|
||||
)
|
||||
block_size = min(data_args.block_size, tokenizer.model_max_length)
|
||||
|
||||
@@ -195,7 +195,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -411,7 +411,7 @@ def main():
|
||||
and not training_args.overwrite_output_dir
|
||||
):
|
||||
raise ValueError(
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
||||
"Use --overwrite_output_dir to overcome."
|
||||
)
|
||||
|
||||
@@ -556,7 +556,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
|
||||
@@ -528,7 +528,7 @@ def main():
|
||||
and not training_args.overwrite_output_dir
|
||||
):
|
||||
raise ValueError(
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
||||
"Use --overwrite_output_dir to overcome."
|
||||
)
|
||||
|
||||
@@ -647,7 +647,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
|
||||
@@ -176,7 +176,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -582,7 +582,7 @@ def main():
|
||||
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
@@ -415,7 +415,7 @@ def main():
|
||||
and not training_args.overwrite_output_dir
|
||||
):
|
||||
raise ValueError(
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
||||
"Use `--overwrite_output_dir` to overcome."
|
||||
)
|
||||
|
||||
|
||||
@@ -209,7 +209,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -268,7 +268,7 @@ class DataTrainingArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
|
||||
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
|
||||
"during evaluation."
|
||||
)
|
||||
@@ -451,7 +451,7 @@ def main():
|
||||
and not training_args.overwrite_output_dir
|
||||
):
|
||||
raise ValueError(
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
||||
"Use --overwrite_output_dir to overcome."
|
||||
)
|
||||
|
||||
@@ -558,7 +558,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
|
||||
@@ -170,7 +170,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
|
||||
@@ -180,7 +180,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -291,7 +291,7 @@ def main():
|
||||
and not training_args.overwrite_output_dir
|
||||
):
|
||||
raise ValueError(
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
||||
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
||||
"Use --overwrite_output_dir to overcome."
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user