[Docs] Fix spelling and grammar mistakes (#28825)

* Fix typos and grammar mistakes in docs and examples

* Fix typos in docstrings and comments

* Fix spelling of `tokenizer` in model tests

* Remove erroneous spaces in decorators

* Remove extra spaces in Markdown link texts
This commit is contained in:
Klaus Hipp
2024-02-02 08:45:00 +01:00
committed by GitHub
parent 2418c64a1c
commit 721ee783ca
134 changed files with 185 additions and 186 deletions

View File

@@ -378,7 +378,7 @@ def main():
)
# Preprocessing the datasets.
# Preprocessing is slighlty different for training and evaluation.
# Preprocessing is slightly different for training and evaluation.
if training_args.do_train:
column_names = raw_datasets["train"].column_names
elif training_args.do_eval:

View File

@@ -354,7 +354,7 @@ def main():
)
# Preprocessing the datasets.
# Preprocessing is slighlty different for training and evaluation.
# Preprocessing is slightly different for training and evaluation.
if training_args.do_train:
column_names = raw_datasets["train"].column_names
elif training_args.do_eval:

View File

@@ -119,7 +119,7 @@ def parse_args():
default=384,
help=(
"The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_lengh` is passed."
" sequences shorter will be padded if `--pad_to_max_length` is passed."
),
)
parser.add_argument(
@@ -385,7 +385,7 @@ def main():
)
# Preprocessing the datasets.
# Preprocessing is slighlty different for training and evaluation.
# Preprocessing is slightly different for training and evaluation.
column_names = raw_datasets["train"].column_names
question_column_name = "question" if "question" in column_names else column_names[0]
@@ -508,7 +508,7 @@ def main():
raise ValueError("--do_train requires a train dataset")
train_dataset = raw_datasets["train"]
if args.max_train_samples is not None:
# We will select sample from whole data if agument is specified
# We will select sample from whole data if argument is specified
train_dataset = train_dataset.select(range(args.max_train_samples))
# Create train feature from dataset
with accelerator.main_process_first():
@@ -877,7 +877,7 @@ def main():
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)
# intialize all lists to collect the batches
# initialize all lists to collect the batches
all_start_top_log_probs = []
all_start_top_index = []
all_end_top_log_probs = []
@@ -936,7 +936,7 @@ def main():
logger.info(f"Evaluation metrics: {eval_metric}")
if args.do_predict:
# intialize all lists to collect the batches
# initialize all lists to collect the batches
all_start_top_log_probs = []
all_start_top_index = []

View File

@@ -123,7 +123,7 @@ def parse_args():
default=384,
help=(
"The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_lengh` is passed."
" sequences shorter will be padded if `--pad_to_max_length` is passed."
),
)
parser.add_argument(
@@ -460,7 +460,7 @@ def main():
model = AutoModelForQuestionAnswering.from_config(config, trust_remote_code=args.trust_remote_code)
# Preprocessing the datasets.
# Preprocessing is slighlty different for training and evaluation.
# Preprocessing is slightly different for training and evaluation.
column_names = raw_datasets["train"].column_names
@@ -561,7 +561,7 @@ def main():
raise ValueError("--do_train requires a train dataset")
train_dataset = raw_datasets["train"]
if args.max_train_samples is not None:
# We will select sample from whole data if agument is specified
# We will select sample from whole data if argument is specified
train_dataset = train_dataset.select(range(args.max_train_samples))
# Create train feature from dataset

View File

@@ -559,7 +559,7 @@ def main():
raise ValueError("--do_train requires a train dataset")
train_dataset = raw_datasets["train"]
if data_args.max_train_samples is not None:
# We will select sample from whole data if agument is specified
# We will select sample from whole data if argument is specified
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
train_dataset = train_dataset.select(range(max_train_samples))
# Create train feature from dataset