Fix typos in strings and comments (#37910)

2025-05-01 21:58:58 +08:00
parent c80f65265b
commit 5b573bebb9
17 changed files with 25 additions and 25 deletions
--- a/examples/legacy/multiple_choice/utils_multiple_choice.py
+++ b/examples/legacy/multiple_choice/utils_multiple_choice.py
@@ -539,7 +539,7 @@ def convert_examples_to_features(
            if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
                logger.info(
                    "Attention! you are cropping tokens (swag task is ok). "
-                    "If you are training ARC and RACE and you are poping question + options, "
+                    "If you are training ARC and RACE and you are popping question + options, "
                    "you need to try to use a bigger max seq length!"
                )

--- a/examples/legacy/question-answering/run_squad.py
+++ b/examples/legacy/question-answering/run_squad.py
@@ -745,7 +745,7 @@ def main():
        args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
-        use_fast=False,  # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
+        use_fast=False,  # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
    )
    model = AutoModelForQuestionAnswering.from_pretrained(
        args.model_name_or_path,
@@ -795,7 +795,7 @@ def main():
        # Load a trained model and vocabulary that you have fine-tuned
        model = AutoModelForQuestionAnswering.from_pretrained(args.output_dir)  # , force_download=True)

-        # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
+        # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
        # So we use use_fast=False here for now until Fast-tokenizer-compatible-examples are out
        tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case, use_fast=False)
        model.to(args.device)
--- a/examples/legacy/question-answering/run_squad_trainer.py
+++ b/examples/legacy/question-answering/run_squad_trainer.py
@@ -122,7 +122,7 @@ def main():
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
-        use_fast=False,  # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
+        use_fast=False,  # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
    )
    model = AutoModelForQuestionAnswering.from_pretrained(
        model_args.model_name_or_path,
--- a/examples/legacy/run_transfo_xl.py
+++ b/examples/legacy/run_transfo_xl.py
@@ -71,7 +71,7 @@ def main():
    # You can also build the corpus yourself using TransfoXLCorpus methods
    # The pre-processing involve computing word frequencies to prepare the Adaptive input and SoftMax
    # and tokenizing the dataset
-    # The pre-processed corpus is a convertion (using the conversion script )
+    # The pre-processed corpus is a conversion (using the conversion script )
    corpus = TransfoXLCorpus.from_pretrained(args.model_name)

    va_iter = corpus.get_iterator("valid", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)
--- a/examples/legacy/seq2seq/pack_dataset.py
+++ b/examples/legacy/seq2seq/pack_dataset.py
@@ -40,7 +40,7 @@ def pack_examples(tok, src_examples, tgt_examples, max_tokens=1024):
    for src, tgt in tqdm(sorted_examples[1:]):
        cand_src = new_src + " " + src
        cand_tgt = new_tgt + " " + tgt
-        if is_too_big(cand_src) or is_too_big(cand_tgt):  # cant fit, finalize example
+        if is_too_big(cand_src) or is_too_big(cand_tgt):  # can't fit, finalize example
            finished_src.append(new_src)
            finished_tgt.append(new_tgt)
            new_src, new_tgt = src, tgt