Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings
This commit is contained in:
Tom Aarsen
2023-10-12 10:28:40 +02:00
committed by GitHub
parent 3bc65505fc
commit 40ea9ab2a1
154 changed files with 331 additions and 331 deletions

View File

@@ -153,7 +153,7 @@ if args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, use_fast=True)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
@@ -288,7 +288,7 @@ pad_on_right = tokenizer.padding_side == "right"
if args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)

View File

@@ -365,7 +365,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)