Fix typos in strings and comments (#37910)

This commit is contained in:
co63oc
2025-05-01 21:58:58 +08:00
committed by GitHub
parent c80f65265b
commit 5b573bebb9
17 changed files with 25 additions and 25 deletions

View File

@@ -539,7 +539,7 @@ def convert_examples_to_features(
if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
logger.info(
"Attention! you are cropping tokens (swag task is ok). "
"If you are training ARC and RACE and you are poping question + options, "
"If you are training ARC and RACE and you are popping question + options, "
"you need to try to use a bigger max seq length!"
)

View File

@@ -745,7 +745,7 @@ def main():
args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
do_lower_case=args.do_lower_case,
cache_dir=args.cache_dir if args.cache_dir else None,
use_fast=False, # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
use_fast=False, # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
)
model = AutoModelForQuestionAnswering.from_pretrained(
args.model_name_or_path,
@@ -795,7 +795,7 @@ def main():
# Load a trained model and vocabulary that you have fine-tuned
model = AutoModelForQuestionAnswering.from_pretrained(args.output_dir) # , force_download=True)
# SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
# SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
# So we use use_fast=False here for now until Fast-tokenizer-compatible-examples are out
tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case, use_fast=False)
model.to(args.device)

View File

@@ -122,7 +122,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=False, # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
use_fast=False, # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
)
model = AutoModelForQuestionAnswering.from_pretrained(
model_args.model_name_or_path,

View File

@@ -71,7 +71,7 @@ def main():
# You can also build the corpus yourself using TransfoXLCorpus methods
# The pre-processing involve computing word frequencies to prepare the Adaptive input and SoftMax
# and tokenizing the dataset
# The pre-processed corpus is a convertion (using the conversion script )
# The pre-processed corpus is a conversion (using the conversion script )
corpus = TransfoXLCorpus.from_pretrained(args.model_name)
va_iter = corpus.get_iterator("valid", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)

View File

@@ -40,7 +40,7 @@ def pack_examples(tok, src_examples, tgt_examples, max_tokens=1024):
for src, tgt in tqdm(sorted_examples[1:]):
cand_src = new_src + " " + src
cand_tgt = new_tgt + " " + tgt
if is_too_big(cand_src) or is_too_big(cand_tgt): # cant fit, finalize example
if is_too_big(cand_src) or is_too_big(cand_tgt): # can't fit, finalize example
finished_src.append(new_src)
finished_tgt.append(new_tgt)
new_src, new_tgt = src, tgt

View File

@@ -804,7 +804,7 @@ def main():
if "common_voice" in data_args.dataset_name:
kwargs["language"] = config_name
# make sure that adapter weights are saved seperately
# make sure that adapter weights are saved separately
adapter_file = WAV2VEC2_ADAPTER_SAFE_FILE.format(data_args.target_language)
adapter_file = os.path.join(training_args.output_dir, adapter_file)
logger.info(f"Saving adapter weights under {adapter_file}...")