chore: Fix typos in docs and examples (#36524)

Fix typos in docs and examples Signed-off-by: co63oc <co63oc@users.noreply.github.com>
2025-03-04 21:47:41 +08:00
parent 84f0186e89
commit 37508816d6
38 changed files with 50 additions and 50 deletions
--- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
+++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
@@ -296,7 +296,7 @@ class DataCollatorForWav2Vec2Pretraining:
            The Wav2Vec2 model used for pretraining. The data collator needs to have access
            to config and ``_get_feat_extract_output_lengths`` function for correct padding.
        feature_extractor (:class:`~transformers.Wav2Vec2FeatureExtractor`):
-            The processor used for proccessing the data.
+            The processor used for processing the data.
        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
            among:
@@ -445,7 +445,7 @@ def main():
    accelerator.wait_for_everyone()

    # 1. Download and create train, validation dataset
-    # We load all dataset configuration and datset split pairs passed in
+    # We load all dataset configuration and dataset split pairs passed in
    # ``args.dataset_config_names`` and ``args.dataset_split_names``
    datasets_splits = []
    for dataset_config_name, train_split_name in zip(args.dataset_config_names, args.dataset_split_names):
--- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
+++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
@@ -292,7 +292,7 @@ class DataCollatorCTCWithPadding:
    Data collator that will dynamically pad the inputs received.
    Args:
        processor (:class:`~transformers.AutoProcessor`)
-            The processor used for proccessing the data.
+            The processor used for processing the data.
        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
            among:
--- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py
+++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py
@@ -275,7 +275,7 @@ class DataCollatorCTCWithPadding:
    Data collator that will dynamically pad the inputs received.
    Args:
        processor (:class:`~transformers.AutoProcessor`)
-            The processor used for proccessing the data.
+            The processor used for processing the data.
        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
            among:
@@ -559,7 +559,7 @@ def main():
                )

                # if we doing adapter language training, save
-                # vocab with adpter language
+                # vocab with adapter language
                if data_args.target_language is not None:
                    vocab_dict[data_args.target_language] = lang_dict

--- a/examples/pytorch/text-classification/run_classification.py
+++ b/examples/pytorch/text-classification/run_classification.py
@@ -429,7 +429,7 @@ def main():
    if is_regression:
        label_list = None
        num_labels = 1
-        # regession requires float as label type, let's cast it if needed
+        # regression requires float as label type, let's cast it if needed
        for split in raw_datasets.keys():
            if raw_datasets[split].features["label"].dtype not in ["float32", "float64"]:
                logger.warning(
--- a/examples/pytorch/text-generation/README.md
+++ b/examples/pytorch/text-generation/README.md
@@ -19,7 +19,7 @@ limitations under the License.
 Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-generation/run_generation.py).

 Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, GPT-J, Transformer-XL, XLNet, CTRL, BLOOM, LLAMA, OPT.
-A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
+A similar script is used for our official demo [Write With Transformer](https://transformer.huggingface.co), where you
 can try out the different models available in the library.

 Example usage:
--- a/examples/pytorch/token-classification/README.md
+++ b/examples/pytorch/token-classification/README.md
@@ -19,7 +19,7 @@ limitations under the License.
 ## PyTorch version

 Fine-tuning the library models for token classification task such as Named Entity Recognition (NER), Parts-of-speech
-tagging (POS) or phrase extraction (CHUNKS). The main scrip `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
+tagging (POS) or phrase extraction (CHUNKS). The main script `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
 customize it to your needs if you need extra processing on your datasets.

 It will either run on a datasets hosted on our [hub](https://huggingface.co/datasets) or with your own text files for