Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
This commit is contained in:
@@ -132,7 +132,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -151,7 +151,7 @@ class ModelArguments:
|
||||
default=False,
|
||||
metadata={
|
||||
"help": (
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
|
||||
"set True will benefit LLM loading time and RAM consumption."
|
||||
)
|
||||
},
|
||||
@@ -424,7 +424,7 @@ def main():
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
@@ -506,7 +506,7 @@ def main():
|
||||
else:
|
||||
if data_args.block_size > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
|
||||
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
|
||||
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
|
||||
)
|
||||
block_size = min(data_args.block_size, tokenizer.model_max_length)
|
||||
|
||||
@@ -199,7 +199,7 @@ def parse_args():
|
||||
default=False,
|
||||
help=(
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
),
|
||||
)
|
||||
@@ -226,7 +226,7 @@ def parse_args():
|
||||
default="all",
|
||||
help=(
|
||||
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
|
||||
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
|
||||
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
|
||||
"Only applicable when `--with_tracking` is passed."
|
||||
),
|
||||
)
|
||||
@@ -234,7 +234,7 @@ def parse_args():
|
||||
"--low_cpu_mem_usage",
|
||||
action="store_true",
|
||||
help=(
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
|
||||
"If passed, LLM loading time and RAM consumption will be benefited."
|
||||
),
|
||||
)
|
||||
@@ -398,7 +398,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
@@ -449,7 +449,7 @@ def main():
|
||||
else:
|
||||
if args.block_size > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The block_size passed ({args.block_size}) is larger than the maximum length for the model"
|
||||
f"The block_size passed ({args.block_size}) is larger than the maximum length for the model "
|
||||
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
|
||||
)
|
||||
block_size = min(args.block_size, tokenizer.model_max_length)
|
||||
|
||||
@@ -128,7 +128,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -137,7 +137,7 @@ class ModelArguments:
|
||||
default=False,
|
||||
metadata={
|
||||
"help": (
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
|
||||
"set True will benefit LLM loading time and RAM consumption."
|
||||
)
|
||||
},
|
||||
@@ -417,7 +417,7 @@ def main():
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
@@ -462,7 +462,7 @@ def main():
|
||||
else:
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
@@ -206,7 +206,7 @@ def parse_args():
|
||||
default=False,
|
||||
help=(
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
),
|
||||
)
|
||||
@@ -233,7 +233,7 @@ def parse_args():
|
||||
default="all",
|
||||
help=(
|
||||
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
|
||||
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
|
||||
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
|
||||
"Only applicable when `--with_tracking` is passed."
|
||||
),
|
||||
)
|
||||
@@ -241,7 +241,7 @@ def parse_args():
|
||||
"--low_cpu_mem_usage",
|
||||
action="store_true",
|
||||
help=(
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
|
||||
"If passed, LLM loading time and RAM consumption will be benefited."
|
||||
),
|
||||
)
|
||||
@@ -395,7 +395,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
@@ -434,7 +434,7 @@ def main():
|
||||
else:
|
||||
if args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
@@ -115,7 +115,7 @@ class ModelArguments:
|
||||
default=False,
|
||||
metadata={
|
||||
"help": (
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
|
||||
"set True will benefit LLM loading time and RAM consumption."
|
||||
)
|
||||
},
|
||||
@@ -385,7 +385,7 @@ def main():
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
@@ -419,7 +419,7 @@ def main():
|
||||
|
||||
if data_args.max_seq_length > tokenizer.model_max_length:
|
||||
logger.warning(
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
|
||||
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
|
||||
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
|
||||
)
|
||||
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
||||
|
||||
Reference in New Issue
Block a user