[Docs] Fix spelling and grammar mistakes (#28825)

* Fix typos and grammar mistakes in docs and examples

* Fix typos in docstrings and comments

* Fix spelling of `tokenizer` in model tests

* Remove erroneous spaces in decorators

* Remove extra spaces in Markdown link texts
This commit is contained in:
Klaus Hipp
2024-02-02 08:45:00 +01:00
committed by GitHub
parent 2418c64a1c
commit 721ee783ca
134 changed files with 185 additions and 186 deletions

View File

@@ -311,7 +311,7 @@ def main():
# Log on each process the small summary:
logger.info(f"Training/evaluation parameters {training_args}")
# 3. Detecting last checkpoint and eventualy continue from last checkpoint
# 3. Detecting last checkpoint and eventually continue from last checkpoint
last_checkpoint = None
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:

View File

@@ -107,10 +107,10 @@ from datasets import load_dataset
# example 1: local folder
dataset = load_dataset("imagefolder", data_dir="path_to_your_folder")
# example 2: local files (suppoted formats are tar, gzip, zip, xz, rar, zstd)
# example 2: local files (supported formats are tar, gzip, zip, xz, rar, zstd)
dataset = load_dataset("imagefolder", data_files="path_to_zip_file")
# example 3: remote files (suppoted formats are tar, gzip, zip, xz, rar, zstd)
# example 3: remote files (supported formats are tar, gzip, zip, xz, rar, zstd)
dataset = load_dataset("imagefolder", data_files="https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip")
# example 4: providing several splits

View File

@@ -109,7 +109,7 @@ def main(args):
tokenizer.decoder = decoders.Metaspace()
if args.export_to_hub:
logger.info("Exporting the trained tokenzier to Hub.")
logger.info("Exporting the trained tokenizer to Hub.")
new_tokenizer = AlbertTokenizerFast(tokenizer_object=tokenizer)
new_tokenizer.push_to_hub("unigram-tokenizer-dataset")

View File

@@ -512,7 +512,7 @@ def main():
raise ValueError("--do_train requires a train dataset")
train_dataset = datasets["train"]
if data_args.max_train_samples is not None:
# We will select sample from whole data if agument is specified
# We will select sample from whole data if argument is specified
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
train_dataset = train_dataset.select(range(max_train_samples))
# Create train feature from dataset