[Docs] Fix spelling and grammar mistakes (#28825)

* Fix typos and grammar mistakes in docs and examples * Fix typos in docstrings and comments * Fix spelling of `tokenizer` in model tests * Remove erroneous spaces in decorators * Remove extra spaces in Markdown link texts
2024-02-02 08:45:00 +01:00
parent 2418c64a1c
commit 721ee783ca
134 changed files with 185 additions and 186 deletions
--- a/examples/pytorch/text-classification/README.md
+++ b/examples/pytorch/text-classification/README.md
@@ -129,7 +129,7 @@ python run_classification.py \
    --num_train_epochs 15 \
    --output_dir /tmp/${dataset}_${subset}/ 
 ```
- It results in a Micro F1 score of around 0.82 without any text and label filtering. Note that you have to explictly remove the "unused" split from the dataset, since it is not used for classification.
+ It results in a Micro F1 score of around 0.82 without any text and label filtering. Note that you have to explicitly remove the "unused" split from the dataset, since it is not used for classification.

 ### Mixed precision training

--- a/examples/pytorch/text-classification/run_classification.py
+++ b/examples/pytorch/text-classification/run_classification.py
@@ -83,7 +83,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "The name of the text column in the input dataset or a CSV/JSON file. "
-                'If not specified, will use the "sentence" column for single/multi-label classifcation task.'
+                'If not specified, will use the "sentence" column for single/multi-label classification task.'
            )
        },
    )
@@ -121,7 +121,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "The name of the label column in the input dataset or a CSV/JSON file. "
-                'If not specified, will use the "label" column for single/multi-label classifcation task'
+                'If not specified, will use the "label" column for single/multi-label classification task'
            )
        },
    )
@@ -260,7 +260,7 @@ class ModelArguments:


 def get_label_list(raw_dataset, split="train") -> List[str]:
-    """Get the list of labels from a mutli-label dataset"""
+    """Get the list of labels from a multi-label dataset"""

    if isinstance(raw_dataset[split]["label"][0], list):
        label_list = [label for sample in raw_dataset[split]["label"] for label in sample]
@@ -343,7 +343,7 @@ def main():

    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files, or specify a dataset name
    # to load from huggingface/datasets. In ether case, you can specify a the key of the column(s) containing the text and
-    # the key of the column containing the label. If multiple columns are specified for the text, they will be joined togather
+    # the key of the column containing the label. If multiple columns are specified for the text, they will be joined together
    # for the actual text value.
    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
    # download the dataset.