Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
This commit is contained in:
@@ -113,7 +113,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -387,7 +387,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
|
||||
@@ -178,7 +178,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
|
||||
@@ -133,7 +133,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -399,7 +399,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
# endregion
|
||||
@@ -432,7 +432,7 @@ def main():
|
||||
else:
|
||||
if data_args.block_size > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
|
||||
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
|
||||
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
|
||||
)
|
||||
block_size = min(data_args.block_size, tokenizer.model_max_length)
|
||||
|
||||
@@ -131,7 +131,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -383,7 +383,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
# endregion
|
||||
@@ -404,7 +404,7 @@ def main():
|
||||
else:
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
@@ -167,7 +167,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -383,7 +383,7 @@ def main():
|
||||
else:
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
@@ -98,7 +98,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -400,7 +400,7 @@ def main():
|
||||
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
@@ -120,7 +120,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -194,7 +194,7 @@ class DataTrainingArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
|
||||
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
|
||||
"during ``evaluate`` and ``predict``."
|
||||
)
|
||||
|
||||
@@ -185,7 +185,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -353,7 +353,7 @@ def main():
|
||||
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
@@ -101,7 +101,7 @@ class DataTrainingArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether to pad all samples to `max_seq_length`. "
|
||||
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
|
||||
"If False, will pad the samples dynamically when batching to the maximum length in the batch. "
|
||||
"Data will always be padded when using TPUs."
|
||||
)
|
||||
},
|
||||
@@ -191,7 +191,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -360,7 +360,7 @@ def main():
|
||||
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
@@ -96,7 +96,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -334,7 +334,7 @@ def main():
|
||||
tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
|
||||
if not tokenizer_name_or_path:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -182,7 +182,7 @@ class DataTrainingArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
|
||||
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
|
||||
"during ``evaluate`` and ``predict``."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user