[Docs] Fix spelling and grammar mistakes (#28825)
* Fix typos and grammar mistakes in docs and examples * Fix typos in docstrings and comments * Fix spelling of `tokenizer` in model tests * Remove erroneous spaces in decorators * Remove extra spaces in Markdown link texts
This commit is contained in:
@@ -311,7 +311,7 @@ def main():
|
||||
# Log on each process the small summary:
|
||||
logger.info(f"Training/evaluation parameters {training_args}")
|
||||
|
||||
# 3. Detecting last checkpoint and eventualy continue from last checkpoint
|
||||
# 3. Detecting last checkpoint and eventually continue from last checkpoint
|
||||
last_checkpoint = None
|
||||
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
|
||||
if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
|
||||
|
||||
@@ -107,10 +107,10 @@ from datasets import load_dataset
|
||||
# example 1: local folder
|
||||
dataset = load_dataset("imagefolder", data_dir="path_to_your_folder")
|
||||
|
||||
# example 2: local files (suppoted formats are tar, gzip, zip, xz, rar, zstd)
|
||||
# example 2: local files (supported formats are tar, gzip, zip, xz, rar, zstd)
|
||||
dataset = load_dataset("imagefolder", data_files="path_to_zip_file")
|
||||
|
||||
# example 3: remote files (suppoted formats are tar, gzip, zip, xz, rar, zstd)
|
||||
# example 3: remote files (supported formats are tar, gzip, zip, xz, rar, zstd)
|
||||
dataset = load_dataset("imagefolder", data_files="https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip")
|
||||
|
||||
# example 4: providing several splits
|
||||
|
||||
@@ -109,7 +109,7 @@ def main(args):
|
||||
tokenizer.decoder = decoders.Metaspace()
|
||||
|
||||
if args.export_to_hub:
|
||||
logger.info("Exporting the trained tokenzier to Hub.")
|
||||
logger.info("Exporting the trained tokenizer to Hub.")
|
||||
new_tokenizer = AlbertTokenizerFast(tokenizer_object=tokenizer)
|
||||
new_tokenizer.push_to_hub("unigram-tokenizer-dataset")
|
||||
|
||||
|
||||
@@ -512,7 +512,7 @@ def main():
|
||||
raise ValueError("--do_train requires a train dataset")
|
||||
train_dataset = datasets["train"]
|
||||
if data_args.max_train_samples is not None:
|
||||
# We will select sample from whole data if agument is specified
|
||||
# We will select sample from whole data if argument is specified
|
||||
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
||||
train_dataset = train_dataset.select(range(max_train_samples))
|
||||
# Create train feature from dataset
|
||||
|
||||
Reference in New Issue
Block a user