Fix typos in strings and comments (#37910)
This commit is contained in:
@@ -539,7 +539,7 @@ def convert_examples_to_features(
|
||||
if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
|
||||
logger.info(
|
||||
"Attention! you are cropping tokens (swag task is ok). "
|
||||
"If you are training ARC and RACE and you are poping question + options, "
|
||||
"If you are training ARC and RACE and you are popping question + options, "
|
||||
"you need to try to use a bigger max seq length!"
|
||||
)
|
||||
|
||||
|
||||
@@ -745,7 +745,7 @@ def main():
|
||||
args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
|
||||
do_lower_case=args.do_lower_case,
|
||||
cache_dir=args.cache_dir if args.cache_dir else None,
|
||||
use_fast=False, # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
|
||||
use_fast=False, # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
|
||||
)
|
||||
model = AutoModelForQuestionAnswering.from_pretrained(
|
||||
args.model_name_or_path,
|
||||
@@ -795,7 +795,7 @@ def main():
|
||||
# Load a trained model and vocabulary that you have fine-tuned
|
||||
model = AutoModelForQuestionAnswering.from_pretrained(args.output_dir) # , force_download=True)
|
||||
|
||||
# SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
|
||||
# SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
|
||||
# So we use use_fast=False here for now until Fast-tokenizer-compatible-examples are out
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case, use_fast=False)
|
||||
model.to(args.device)
|
||||
|
||||
@@ -122,7 +122,7 @@ def main():
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
|
||||
cache_dir=model_args.cache_dir,
|
||||
use_fast=False, # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
|
||||
use_fast=False, # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handling
|
||||
)
|
||||
model = AutoModelForQuestionAnswering.from_pretrained(
|
||||
model_args.model_name_or_path,
|
||||
|
||||
@@ -71,7 +71,7 @@ def main():
|
||||
# You can also build the corpus yourself using TransfoXLCorpus methods
|
||||
# The pre-processing involve computing word frequencies to prepare the Adaptive input and SoftMax
|
||||
# and tokenizing the dataset
|
||||
# The pre-processed corpus is a convertion (using the conversion script )
|
||||
# The pre-processed corpus is a conversion (using the conversion script )
|
||||
corpus = TransfoXLCorpus.from_pretrained(args.model_name)
|
||||
|
||||
va_iter = corpus.get_iterator("valid", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)
|
||||
|
||||
@@ -40,7 +40,7 @@ def pack_examples(tok, src_examples, tgt_examples, max_tokens=1024):
|
||||
for src, tgt in tqdm(sorted_examples[1:]):
|
||||
cand_src = new_src + " " + src
|
||||
cand_tgt = new_tgt + " " + tgt
|
||||
if is_too_big(cand_src) or is_too_big(cand_tgt): # cant fit, finalize example
|
||||
if is_too_big(cand_src) or is_too_big(cand_tgt): # can't fit, finalize example
|
||||
finished_src.append(new_src)
|
||||
finished_tgt.append(new_tgt)
|
||||
new_src, new_tgt = src, tgt
|
||||
|
||||
Reference in New Issue
Block a user