Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
This commit is contained in:
@@ -120,7 +120,7 @@ class ModelArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
@@ -205,7 +205,7 @@ class DataTrainingArguments:
|
||||
metadata={
|
||||
"help": (
|
||||
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
|
||||
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
|
||||
"during ``evaluate`` and ``predict``."
|
||||
)
|
||||
@@ -271,7 +271,7 @@ class DataTrainingArguments:
|
||||
default=None,
|
||||
metadata={
|
||||
"help": (
|
||||
"The token to force as the first generated token after the decoder_start_token_id."
|
||||
"The token to force as the first generated token after the decoder_start_token_id. "
|
||||
"Useful for multilingual models like mBART where the first generated token"
|
||||
"needs to be the target language token (Usually it is the target language token)"
|
||||
)
|
||||
@@ -556,7 +556,7 @@ def main():
|
||||
|
||||
if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"):
|
||||
logger.warning(
|
||||
"label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for"
|
||||
"label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for "
|
||||
f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory"
|
||||
)
|
||||
|
||||
|
||||
@@ -146,7 +146,7 @@ def parse_args():
|
||||
default=128,
|
||||
help=(
|
||||
"The maximum total sequence length for target text after "
|
||||
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
|
||||
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded. "
|
||||
"during ``evaluate`` and ``predict``."
|
||||
),
|
||||
)
|
||||
@@ -272,7 +272,7 @@ def parse_args():
|
||||
default=False,
|
||||
help=(
|
||||
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
|
||||
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
||||
"execute code present on the Hub on your local machine."
|
||||
),
|
||||
)
|
||||
@@ -299,7 +299,7 @@ def parse_args():
|
||||
default="all",
|
||||
help=(
|
||||
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
|
||||
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
|
||||
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
|
||||
"Only applicable when `--with_tracking` is passed."
|
||||
),
|
||||
)
|
||||
@@ -433,7 +433,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
||||
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
||||
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user