From 121830ab47ef0342d014880ab45add634cf2068f Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 25 Mar 2025 18:15:47 +0100 Subject: [PATCH] update examples after ruff being updated (#36972) * update * update --------- Co-authored-by: ydshieh --- .circleci/config.yml | 2 +- examples/flax/language-modeling/run_bert_flax.py | 2 +- examples/legacy/seq2seq/finetune_trainer.py | 6 +++--- examples/legacy/seq2seq/run_eval_search.py | 2 +- examples/legacy/seq2seq/utils.py | 8 ++++---- examples/legacy/token-classification/tasks.py | 2 +- .../modular-transformers/modeling_multimodal2.py | 2 +- examples/pytorch/language-modeling/run_clm.py | 2 +- examples/pytorch/language-modeling/run_fim.py | 5 ++--- .../pytorch/language-modeling/run_fim_no_trainer.py | 4 +--- .../speech-recognition/run_speech_recognition_ctc.py | 2 +- .../run_speech_recognition_ctc_adapter.py | 2 +- examples/pytorch/summarization/run_summarization.py | 6 +++--- .../text-classification/run_classification.py | 12 ++++++------ examples/pytorch/text-classification/run_glue.py | 12 ++++++------ examples/pytorch/text-generation/run_generation.py | 2 +- .../run_generation_contrastive_search.py | 2 +- .../translation/run_translation_no_trainer.py | 6 +++--- examples/run_on_remote.py | 2 +- examples/tensorflow/translation/run_translation.py | 6 +++--- 20 files changed, 42 insertions(+), 45 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 108e319d2c..19428b7bb9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -154,7 +154,7 @@ jobs: path: ~/transformers/installed.txt - run: python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1) - run: ruff check examples tests src utils - - run: ruff format tests src utils --check + - run: ruff format examples tests src utils --check - run: python utils/custom_init_isort.py --check_only - run: python utils/sort_auto_mappings.py --check_only - run: python utils/check_doc_toc.py diff --git a/examples/flax/language-modeling/run_bert_flax.py b/examples/flax/language-modeling/run_bert_flax.py index 2e73af4592..2faea6b5c5 100644 --- a/examples/flax/language-modeling/run_bert_flax.py +++ b/examples/flax/language-modeling/run_bert_flax.py @@ -53,4 +53,4 @@ for _ in range(nbenchmark): func() end = time.time() print(end - start) -print(f"Throughput: {((nbenchmark * BS)/(end-start)):.3f} examples/sec") +print(f"Throughput: {((nbenchmark * BS) / (end - start)):.3f} examples/sec") diff --git a/examples/legacy/seq2seq/finetune_trainer.py b/examples/legacy/seq2seq/finetune_trainer.py index 5ede86ee08..e9daf9fc50 100755 --- a/examples/legacy/seq2seq/finetune_trainer.py +++ b/examples/legacy/seq2seq/finetune_trainer.py @@ -231,9 +231,9 @@ def main(): # set decoder_start_token_id for MBart if model.config.decoder_start_token_id is None and isinstance(tokenizer, (MBartTokenizer, MBartTokenizerFast)): - assert ( - data_args.tgt_lang is not None and data_args.src_lang is not None - ), "mBart requires --tgt_lang and --src_lang" + assert data_args.tgt_lang is not None and data_args.src_lang is not None, ( + "mBart requires --tgt_lang and --src_lang" + ) if isinstance(tokenizer, MBartTokenizer): model.config.decoder_start_token_id = tokenizer.lang_code_to_id[data_args.tgt_lang] else: diff --git a/examples/legacy/seq2seq/run_eval_search.py b/examples/legacy/seq2seq/run_eval_search.py index 9b5debfb27..e6048a4ec4 100755 --- a/examples/legacy/seq2seq/run_eval_search.py +++ b/examples/legacy/seq2seq/run_eval_search.py @@ -128,7 +128,7 @@ def run_search(): results_sorted = sorted(results, key=operator.itemgetter(*task_score_names[task]), reverse=True) print(" | ".join([f"{col:{col_widths[col]}}" for col in col_names])) - print(" | ".join([f"{'-'*col_widths[col]}" for col in col_names])) + print(" | ".join([f"{'-' * col_widths[col]}" for col in col_names])) for row in results_sorted: print(" | ".join([f"{row[col]:{col_widths[col]}}" for col in col_names])) diff --git a/examples/legacy/seq2seq/utils.py b/examples/legacy/seq2seq/utils.py index d7cd84dedb..955c9e9961 100644 --- a/examples/legacy/seq2seq/utils.py +++ b/examples/legacy/seq2seq/utils.py @@ -282,9 +282,9 @@ class Seq2SeqDataCollator: self.tokenizer = tokenizer self.pad_token_id = tokenizer.pad_token_id self.decoder_start_token_id = decoder_start_token_id - assert ( - self.pad_token_id is not None - ), f"pad_token_id is not defined for ({self.tokenizer.__class__.__name__}), it must be defined." + assert self.pad_token_id is not None, ( + f"pad_token_id is not defined for ({self.tokenizer.__class__.__name__}), it must be defined." + ) self.data_args = data_args self.tpu_num_cores = tpu_num_cores self.dataset_kwargs = {"add_prefix_space": True} if isinstance(tokenizer, BartTokenizer) else {} @@ -593,7 +593,7 @@ def assert_all_frozen(model): model_grads: List[bool] = list(grad_status(model)) n_require_grad = sum(lmap(int, model_grads)) npars = len(model_grads) - assert not any(model_grads), f"{n_require_grad/npars:.1%} of {npars} weights require grad" + assert not any(model_grads), f"{n_require_grad / npars:.1%} of {npars} weights require grad" def assert_not_all_frozen(model): diff --git a/examples/legacy/token-classification/tasks.py b/examples/legacy/token-classification/tasks.py index d893a2ab03..43de0a7f04 100644 --- a/examples/legacy/token-classification/tasks.py +++ b/examples/legacy/token-classification/tasks.py @@ -131,7 +131,7 @@ class POS(TokenClassificationTask): s_p = preds_list[example_id] out = "" for token in sentence: - out += f'{token["form"]} ({token["upos"]}|{s_p.pop(0)}) ' + out += f"{token['form']} ({token['upos']}|{s_p.pop(0)}) " out += "\n" writer.write(out) example_id += 1 diff --git a/examples/modular-transformers/modeling_multimodal2.py b/examples/modular-transformers/modeling_multimodal2.py index b10b11b671..ba2e9a4d6f 100644 --- a/examples/modular-transformers/modeling_multimodal2.py +++ b/examples/modular-transformers/modeling_multimodal2.py @@ -534,7 +534,7 @@ class Multimodal2VisionEmbeddings(nn.Module): batch_size, _, height, width = pixel_values.shape if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): raise ValueError( - f"Input image size ({height}*{width}) doesn't match model" f" ({self.image_size}*{self.image_size})." + f"Input image size ({height}*{width}) doesn't match model ({self.image_size}*{self.image_size})." ) target_dtype = self.patch_embedding.weight.dtype patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid] diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index f5cb6e884e..308e3a9962 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -438,7 +438,7 @@ def main(): else: model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code) n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) - logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") + logger.info(f"Training new model from scratch - Total size={n_params / 2**20:.2f}M params") # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # on a small vocab and want a smaller embedding size, remove this test. diff --git a/examples/pytorch/language-modeling/run_fim.py b/examples/pytorch/language-modeling/run_fim.py index ce00938830..f981c97a8e 100644 --- a/examples/pytorch/language-modeling/run_fim.py +++ b/examples/pytorch/language-modeling/run_fim.py @@ -265,8 +265,7 @@ class DataTrainingArguments: default="", metadata={ "help": ( - "Fill-in-Middle Pad token. Used only when 'truncate_or_pad' is set to True. " - "Defaults to ''." + "Fill-in-Middle Pad token. Used only when 'truncate_or_pad' is set to True. Defaults to ''." ) }, ) @@ -514,7 +513,7 @@ def main(): attn_implementation=model_args.attn_implementation, ) n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) - logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") + logger.info(f"Training new model from scratch - Total size={n_params / 2**20:.2f}M params") # Add the new FIM tokens to the tokenizer and resize model's vocab embeddings special_tokens = [data_args.fim_prefix_token, data_args.fim_middle_token, data_args.fim_suffix_token] diff --git a/examples/pytorch/language-modeling/run_fim_no_trainer.py b/examples/pytorch/language-modeling/run_fim_no_trainer.py index 0dc8730489..b2cd5ddd12 100644 --- a/examples/pytorch/language-modeling/run_fim_no_trainer.py +++ b/examples/pytorch/language-modeling/run_fim_no_trainer.py @@ -234,9 +234,7 @@ def parse_args(): "--fim_pad_token", type=str, default="", - help=( - "Fill-in-Middle Pad token. Used only when 'truncate_or_pad' is set to True." " Defaults to ''." - ), + help=("Fill-in-Middle Pad token. Used only when 'truncate_or_pad' is set to True. Defaults to ''."), ) parser.add_argument( "--preprocessing_num_workers", diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index 828cc68000..7d943203b4 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -491,7 +491,7 @@ def main(): # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic # that could be easily picked up by the model chars_to_ignore_regex = ( - f'[{"".join(data_args.chars_to_ignore)}]' if data_args.chars_to_ignore is not None else None + f"[{''.join(data_args.chars_to_ignore)}]" if data_args.chars_to_ignore is not None else None ) text_column_name = data_args.text_column_name diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py index d41cf9da54..c2091c2af9 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py @@ -471,7 +471,7 @@ def main(): # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic # that could be easily picked up by the model chars_to_ignore_regex = ( - f'[{"".join(data_args.chars_to_ignore)}]' if data_args.chars_to_ignore is not None else None + f"[{''.join(data_args.chars_to_ignore)}]" if data_args.chars_to_ignore is not None else None ) text_column_name = data_args.text_column_name diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index 77fe6c60a7..49ad668784 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -505,9 +505,9 @@ def main(): return if isinstance(tokenizer, tuple(MULTILINGUAL_TOKENIZERS)): - assert ( - data_args.lang is not None - ), f"{tokenizer.__class__.__name__} is a multilingual tokenizer which requires --lang argument" + assert data_args.lang is not None, ( + f"{tokenizer.__class__.__name__} is a multilingual tokenizer which requires --lang argument" + ) tokenizer.src_lang = data_args.lang tokenizer.tgt_lang = data_args.lang diff --git a/examples/pytorch/text-classification/run_classification.py b/examples/pytorch/text-classification/run_classification.py index 45cc295bb4..28bd9a7025 100755 --- a/examples/pytorch/text-classification/run_classification.py +++ b/examples/pytorch/text-classification/run_classification.py @@ -199,9 +199,9 @@ class DataTrainingArguments: train_extension = self.train_file.split(".")[-1] assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file." validation_extension = self.validation_file.split(".")[-1] - assert ( - validation_extension == train_extension - ), "`validation_file` should have the same extension (csv or json) as `train_file`." + assert validation_extension == train_extension, ( + "`validation_file` should have the same extension (csv or json) as `train_file`." + ) @dataclass @@ -357,9 +357,9 @@ def main(): if data_args.test_file is not None: train_extension = data_args.train_file.split(".")[-1] test_extension = data_args.test_file.split(".")[-1] - assert ( - test_extension == train_extension - ), "`test_file` should have the same extension (csv or json) as `train_file`." + assert test_extension == train_extension, ( + "`test_file` should have the same extension (csv or json) as `train_file`." + ) data_files["test"] = data_args.test_file else: raise ValueError("Need either a dataset name or a test file for `do_predict`.") diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index d7d67637be..db3101abda 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -156,9 +156,9 @@ class DataTrainingArguments: train_extension = self.train_file.split(".")[-1] assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file." validation_extension = self.validation_file.split(".")[-1] - assert ( - validation_extension == train_extension - ), "`validation_file` should have the same extension (csv or json) as `train_file`." + assert validation_extension == train_extension, ( + "`validation_file` should have the same extension (csv or json) as `train_file`." + ) @dataclass @@ -313,9 +313,9 @@ def main(): if data_args.test_file is not None: train_extension = data_args.train_file.split(".")[-1] test_extension = data_args.test_file.split(".")[-1] - assert ( - test_extension == train_extension - ), "`test_file` should have the same extension (csv or json) as `train_file`." + assert test_extension == train_extension, ( + "`test_file` should have the same extension (csv or json) as `train_file`." + ) data_files["test"] = data_args.test_file else: raise ValueError("Need either a GLUE task or a test file for `do_predict`.") diff --git a/examples/pytorch/text-generation/run_generation.py b/examples/pytorch/text-generation/run_generation.py index 0e21a24268..570eb92645 100755 --- a/examples/pytorch/text-generation/run_generation.py +++ b/examples/pytorch/text-generation/run_generation.py @@ -322,7 +322,7 @@ def main(): parser.add_argument( "--use_cpu", action="store_true", - help="Whether or not to use cpu. If set to False, " "we will use gpu/npu or mps device if available", + help="Whether or not to use cpu. If set to False, we will use gpu/npu or mps device if available", ) parser.add_argument("--num_return_sequences", type=int, default=1, help="The number of samples to generate.") parser.add_argument( diff --git a/examples/pytorch/text-generation/run_generation_contrastive_search.py b/examples/pytorch/text-generation/run_generation_contrastive_search.py index ba4c9a77e9..a36323e4ed 100755 --- a/examples/pytorch/text-generation/run_generation_contrastive_search.py +++ b/examples/pytorch/text-generation/run_generation_contrastive_search.py @@ -68,7 +68,7 @@ def main(): parser.add_argument( "--use_cpu", action="store_true", - help="Whether or not to use cpu. If set to False, " "we will use gpu/npu or mps device if available", + help="Whether or not to use cpu. If set to False, we will use gpu/npu or mps device if available", ) parser.add_argument( "--fp16", diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 218f245ff3..973548c2ce 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -436,9 +436,9 @@ def main(): # Set decoder_start_token_id if model.config.decoder_start_token_id is None and isinstance(tokenizer, (MBartTokenizer, MBartTokenizerFast)): - assert ( - args.target_lang is not None and args.source_lang is not None - ), "mBart requires --target_lang and --source_lang" + assert args.target_lang is not None and args.source_lang is not None, ( + "mBart requires --target_lang and --source_lang" + ) if isinstance(tokenizer, MBartTokenizer): model.config.decoder_start_token_id = tokenizer.lang_code_to_id[args.target_lang] else: diff --git a/examples/run_on_remote.py b/examples/run_on_remote.py index 46f87065d7..dff9d26848 100644 --- a/examples/run_on_remote.py +++ b/examples/run_on_remote.py @@ -56,7 +56,7 @@ if __name__ == "__main__": cluster.run(["pip install torch --upgrade --extra-index-url https://download.pytorch.org/whl/cu117"]) # Run example. You can bypass the CLI wrapper and paste your own code here. - cluster.run([f'python transformers/examples/{args.example} {" ".join(shlex.quote(arg) for arg in unknown)}']) + cluster.run([f"python transformers/examples/{args.example} {' '.join(shlex.quote(arg) for arg in unknown)}"]) # Alternatively, we can just import and run a training function (especially if there's no wrapper CLI): # from my_script... import train diff --git a/examples/tensorflow/translation/run_translation.py b/examples/tensorflow/translation/run_translation.py index 47ee507253..3b9a9a9c1c 100644 --- a/examples/tensorflow/translation/run_translation.py +++ b/examples/tensorflow/translation/run_translation.py @@ -501,9 +501,9 @@ def main(): # region Set decoder_start_token_id if model.config.decoder_start_token_id is None and isinstance(tokenizer, (MBartTokenizer, MBartTokenizerFast)): - assert ( - data_args.target_lang is not None and data_args.source_lang is not None - ), "mBart requires --target_lang and --source_lang" + assert data_args.target_lang is not None and data_args.source_lang is not None, ( + "mBart requires --target_lang and --source_lang" + ) if isinstance(tokenizer, MBartTokenizer): model.config.decoder_start_token_id = tokenizer.lang_code_to_id[data_args.target_lang] else: