diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index cb7dad8d58..414e1c62e3 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -906,7 +906,7 @@ def main(): layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/language-modeling/run_bart_dlm_flax.py b/examples/flax/language-modeling/run_bart_dlm_flax.py index f3e27be1cc..2ddadcdf99 100644 --- a/examples/flax/language-modeling/run_bart_dlm_flax.py +++ b/examples/flax/language-modeling/run_bart_dlm_flax.py @@ -530,7 +530,7 @@ def main(): trust_remote_code=data_args.trust_remote_code, ) - if "validation" not in datasets.keys(): + if "validation" not in datasets: datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -567,7 +567,7 @@ def main(): num_proc=data_args.preprocessing_num_workers, ) - if "validation" not in datasets.keys(): + if "validation" not in datasets: datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -671,7 +671,7 @@ def main(): # max_seq_length. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can # customize this part to your needs. @@ -777,7 +777,7 @@ def main(): layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index 8d2daaf517..fc1367fb6c 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -407,7 +407,7 @@ def main(): trust_remote_code=model_args.trust_remote_code, ) - if "validation" not in dataset.keys(): + if "validation" not in dataset: dataset["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -447,7 +447,7 @@ def main(): num_proc=data_args.preprocessing_num_workers, ) - if "validation" not in dataset.keys(): + if "validation" not in dataset: dataset["validation"] = load_dataset( extension, data_files=data_files, @@ -580,7 +580,7 @@ def main(): # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can # customize this part to your needs. @@ -674,7 +674,7 @@ def main(): layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index df548de619..0d80b7b0bf 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -448,7 +448,7 @@ def main(): trust_remote_code=model_args.trust_remote_code, ) - if "validation" not in datasets.keys(): + if "validation" not in datasets: datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -485,7 +485,7 @@ def main(): num_proc=data_args.preprocessing_num_workers, ) - if "validation" not in datasets.keys(): + if "validation" not in datasets: datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -603,7 +603,7 @@ def main(): # max_seq_length. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can # customize this part to your needs. @@ -707,7 +707,7 @@ def main(): layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index 9a64b6d716..af3394cccb 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -572,7 +572,7 @@ def main(): trust_remote_code=data_args.trust_remote_code, ) - if "validation" not in datasets.keys(): + if "validation" not in datasets: datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -609,7 +609,7 @@ def main(): num_proc=data_args.preprocessing_num_workers, ) - if "validation" not in datasets.keys(): + if "validation" not in datasets: datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -703,7 +703,7 @@ def main(): # Main data processing function that will concatenate all texts from our dataset and generate chunks of expanded_inputs_length. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can # customize this part to your needs. @@ -814,7 +814,7 @@ def main(): layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 55fb4d9c62..d17304c702 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -345,7 +345,7 @@ def create_train_state( layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py b/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py index a1b7519b9e..40267a95c6 100644 --- a/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py +++ b/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py @@ -668,7 +668,7 @@ def main(): layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index 6c9777cf32..5240db323f 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -768,7 +768,7 @@ def main(): layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index 2f84235945..ade6bc0e49 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -249,7 +249,7 @@ def create_train_state( layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index ccdabd1c71..93130cc52c 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -310,7 +310,7 @@ def create_train_state( layer_norm_named_params = { layer[-2:] for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() + for layer in flat_params if layer_norm_name in "".join(layer).lower() } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} diff --git a/examples/legacy/pytorch-lightning/lightning_base.py b/examples/legacy/pytorch-lightning/lightning_base.py index 5c34fbe7e0..228fc87ca3 100644 --- a/examples/legacy/pytorch-lightning/lightning_base.py +++ b/examples/legacy/pytorch-lightning/lightning_base.py @@ -379,8 +379,8 @@ def generic_train( train_params["distributed_backend"] = "ddp" train_params["accumulate_grad_batches"] = args.accumulate_grad_batches - train_params["accelerator"] = extra_train_kwargs.get("accelerator", None) - train_params["profiler"] = extra_train_kwargs.get("profiler", None) + train_params["accelerator"] = extra_train_kwargs.get("accelerator") + train_params["profiler"] = extra_train_kwargs.get("profiler") trainer = pl.Trainer.from_argparse_args( args, diff --git a/examples/legacy/seq2seq/download_wmt.py b/examples/legacy/seq2seq/download_wmt.py index c52c0c7b4f..2cff6f5992 100755 --- a/examples/legacy/seq2seq/download_wmt.py +++ b/examples/legacy/seq2seq/download_wmt.py @@ -44,7 +44,7 @@ def download_wmt_dataset(src_lang="ro", tgt_lang="en", dataset="wmt16", save_dir save_dir = Path(save_dir) save_dir.mkdir(exist_ok=True) - for split in ds.keys(): + for split in ds: print(f"Splitting {split} with {ds[split].num_rows} records") # to save to val.source, val.target like summary datasets diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py index 96f8a83bc3..10baf5f8a0 100755 --- a/examples/pytorch/image-classification/run_image_classification.py +++ b/examples/pytorch/image-classification/run_image_classification.py @@ -288,7 +288,7 @@ def main(): return {"pixel_values": pixel_values, "labels": labels} # If we don't have a validation split, split off a percentage of train as validation. - data_args.train_val_split = None if "validation" in dataset.keys() else data_args.train_val_split + data_args.train_val_split = None if "validation" in dataset else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = dataset["train"].train_test_split(data_args.train_val_split) dataset["train"] = split["train"] diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index a44959812c..e27605b8ed 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -324,7 +324,7 @@ def main(): ) # If we don't have a validation split, split off a percentage of train as validation. - args.train_val_split = None if "validation" in dataset.keys() else args.train_val_split + args.train_val_split = None if "validation" in dataset else args.train_val_split if isinstance(args.train_val_split, float) and args.train_val_split > 0.0: split = dataset["train"].train_test_split(args.train_val_split) dataset["train"] = split["train"] diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py index f9102b27cd..1ae581b223 100644 --- a/examples/pytorch/image-pretraining/run_mae.py +++ b/examples/pytorch/image-pretraining/run_mae.py @@ -247,7 +247,7 @@ def main(): ) # If we don't have a validation split, split off a percentage of train as validation. - data_args.train_val_split = None if "validation" in ds.keys() else data_args.train_val_split + data_args.train_val_split = None if "validation" in ds else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = ds["train"].train_test_split(data_args.train_val_split) ds["train"] = split["train"] diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py index 538db2038f..f3d47bfff3 100644 --- a/examples/pytorch/image-pretraining/run_mim.py +++ b/examples/pytorch/image-pretraining/run_mim.py @@ -311,7 +311,7 @@ def main(): ) # If we don't have a validation split, split off a percentage of train as validation. - data_args.train_val_split = None if "validation" in ds.keys() else data_args.train_val_split + data_args.train_val_split = None if "validation" in ds else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = ds["train"].train_test_split(data_args.train_val_split) ds["train"] = split["train"] diff --git a/examples/pytorch/image-pretraining/run_mim_no_trainer.py b/examples/pytorch/image-pretraining/run_mim_no_trainer.py index 429ad7d1c6..581a610137 100644 --- a/examples/pytorch/image-pretraining/run_mim_no_trainer.py +++ b/examples/pytorch/image-pretraining/run_mim_no_trainer.py @@ -452,7 +452,7 @@ def main(): ) # If we don't have a validation split, split off a percentage of train as validation. - args.train_val_split = None if "validation" in ds.keys() else args.train_val_split + args.train_val_split = None if "validation" in ds else args.train_val_split if isinstance(args.train_val_split, float) and args.train_val_split > 0.0: split = ds["train"].train_test_split(args.train_val_split) ds["train"] = split["train"] diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 28ebaf9a33..0c5829818d 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -358,7 +358,7 @@ def main(): streaming=data_args.streaming, trust_remote_code=model_args.trust_remote_code, ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: if data_args.streaming: dataset_stream = load_dataset( data_args.dataset_name, @@ -412,7 +412,7 @@ def main(): **dataset_args, ) # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: if data_args.streaming: dataset_stream = load_dataset( extension, @@ -579,7 +579,7 @@ def main(): # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict. # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index f5ac527e84..0c397bc28c 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -335,7 +335,7 @@ def main(): raw_datasets = load_dataset( args.dataset_name, args.dataset_config_name, trust_remote_code=args.trust_remote_code ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( args.dataset_name, args.dataset_config_name, @@ -362,7 +362,7 @@ def main(): dataset_args["keep_linebreaks"] = not args.no_keep_linebreaks raw_datasets = load_dataset(extension, data_files=data_files, **dataset_args) # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -465,7 +465,7 @@ def main(): # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict. # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. diff --git a/examples/pytorch/language-modeling/run_fim.py b/examples/pytorch/language-modeling/run_fim.py index 7e82238e6e..bce3adabfc 100644 --- a/examples/pytorch/language-modeling/run_fim.py +++ b/examples/pytorch/language-modeling/run_fim.py @@ -388,7 +388,7 @@ def main(): streaming=data_args.streaming, trust_remote_code=model_args.trust_remote_code, ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -430,7 +430,7 @@ def main(): **dataset_args, ) # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -652,7 +652,7 @@ def main(): # Data processing function that will concatenate all texts from our dataset and generate chunks of block_size. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict. # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. diff --git a/examples/pytorch/language-modeling/run_fim_no_trainer.py b/examples/pytorch/language-modeling/run_fim_no_trainer.py index 400dd643a5..6e5cc427f4 100644 --- a/examples/pytorch/language-modeling/run_fim_no_trainer.py +++ b/examples/pytorch/language-modeling/run_fim_no_trainer.py @@ -401,7 +401,7 @@ def main(): raw_datasets = load_dataset( args.dataset_name, args.dataset_config_name, trust_remote_code=args.trust_remote_code ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( args.dataset_name, args.dataset_config_name, @@ -427,7 +427,7 @@ def main(): dataset_args["keep_linebreaks"] = not args.no_keep_linebreaks raw_datasets = load_dataset(extension, data_files=data_files, **dataset_args) # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -599,7 +599,7 @@ def main(): # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict. # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index 57f8a34189..4573d343a7 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -331,7 +331,7 @@ def main(): streaming=data_args.streaming, trust_remote_code=model_args.trust_remote_code, ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -368,7 +368,7 @@ def main(): ) # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -541,7 +541,7 @@ def main(): # max_seq_length. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, and if the total_length < max_seq_length we exclude this batch and return an empty dict. # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 559194aaa9..d975e1acf6 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -342,7 +342,7 @@ def main(): raw_datasets = load_dataset( args.dataset_name, args.dataset_config_name, trust_remote_code=args.trust_remote_code ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( args.dataset_name, args.dataset_config_name, @@ -367,7 +367,7 @@ def main(): extension = "text" raw_datasets = load_dataset(extension, data_files=data_files) # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -496,7 +496,7 @@ def main(): # max_seq_length. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, and if the total_length < max_seq_length we exclude this batch and return an empty dict. # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. diff --git a/examples/pytorch/language-modeling/run_plm.py b/examples/pytorch/language-modeling/run_plm.py index 31c70d6a57..d7bb35d595 100755 --- a/examples/pytorch/language-modeling/run_plm.py +++ b/examples/pytorch/language-modeling/run_plm.py @@ -309,7 +309,7 @@ def main(): token=model_args.token, trust_remote_code=data_args.trust_remote_code, ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -338,7 +338,7 @@ def main(): extension = "text" raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -466,7 +466,7 @@ def main(): # max_seq_length. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, and if the total_length < max_seq_length we exclude this batch and return an empty dict. # We could add padding if the model supported it instead of this drop, you can customize this part to your needs. diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index 60467170ca..d99cd869f0 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -316,7 +316,7 @@ def main(): raw_datasets = load_dataset(extension, data_files=data_files) # Trim a number of training examples if args.debug: - for split in raw_datasets.keys(): + for split in raw_datasets: raw_datasets[split] = raw_datasets[split].select(range(100)) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets. diff --git a/examples/pytorch/object-detection/run_object_detection.py b/examples/pytorch/object-detection/run_object_detection.py index f97be95d0b..71fff54cca 100644 --- a/examples/pytorch/object-detection/run_object_detection.py +++ b/examples/pytorch/object-detection/run_object_detection.py @@ -403,7 +403,7 @@ def main(): ) # If we don't have a validation split, split off a percentage of train as validation - data_args.train_val_split = None if "validation" in dataset.keys() else data_args.train_val_split + data_args.train_val_split = None if "validation" in dataset else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = dataset["train"].train_test_split(data_args.train_val_split, seed=training_args.seed) dataset["train"] = split["train"] diff --git a/examples/pytorch/object-detection/run_object_detection_no_trainer.py b/examples/pytorch/object-detection/run_object_detection_no_trainer.py index 805cc18d89..aaf54138f0 100644 --- a/examples/pytorch/object-detection/run_object_detection_no_trainer.py +++ b/examples/pytorch/object-detection/run_object_detection_no_trainer.py @@ -465,7 +465,7 @@ def main(): dataset = load_dataset(args.dataset_name, cache_dir=args.cache_dir, trust_remote_code=args.trust_remote_code) # If we don't have a validation split, split off a percentage of train as validation. - args.train_val_split = None if "validation" in dataset.keys() else args.train_val_split + args.train_val_split = None if "validation" in dataset else args.train_val_split if isinstance(args.train_val_split, float) and args.train_val_split > 0.0: split = dataset["train"].train_test_split(args.train_val_split, seed=args.seed) dataset["train"] = split["train"] diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py index 9db580c376..3facad307e 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py @@ -255,7 +255,7 @@ def main(): dataset = dataset.rename_columns({"annotation": "label"}) # If we don't have a validation split, split off a percentage of train as validation. - data_args.train_val_split = None if "validation" in dataset.keys() else data_args.train_val_split + data_args.train_val_split = None if "validation" in dataset else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = dataset["train"].train_test_split(data_args.train_val_split) dataset["train"] = split["train"] diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index d30e72cb64..3d2caf88bf 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -314,7 +314,7 @@ def main(): dataset = dataset.rename_columns({"annotation": "label"}) # If we don't have a validation split, split off a percentage of train as validation. - args.train_val_split = None if "validation" in dataset.keys() else args.train_val_split + args.train_val_split = None if "validation" in dataset else args.train_val_split if isinstance(args.train_val_split, float) and args.train_val_split > 0.0: split = dataset["train"].train_test_split(args.train_val_split) dataset["train"] = split["train"] diff --git a/examples/pytorch/text-classification/run_classification.py b/examples/pytorch/text-classification/run_classification.py index c146d4c800..80c616ee74 100755 --- a/examples/pytorch/text-classification/run_classification.py +++ b/examples/pytorch/text-classification/run_classification.py @@ -378,7 +378,7 @@ def main(): else: raise ValueError("Need either a dataset name or a test file for `do_predict`.") - for key in data_files.keys(): + for key in data_files: logger.info(f"load a local file for {key}: {data_files[key]}") if data_args.train_file.endswith(".csv"): @@ -422,13 +422,13 @@ def main(): raw_datasets.pop(data_args.test_split_name) if data_args.remove_columns is not None: - for split in raw_datasets.keys(): + for split in raw_datasets: for column in data_args.remove_columns.split(","): logger.info(f"removing column {column} from split {split}") raw_datasets[split] = raw_datasets[split].remove_columns(column) if data_args.label_column_name is not None and data_args.label_column_name != "label": - for key in raw_datasets.keys(): + for key in raw_datasets: raw_datasets[key] = raw_datasets[key].rename_column(data_args.label_column_name, "label") # Trying to have good defaults here, don't hesitate to tweak to your needs. @@ -444,7 +444,7 @@ def main(): label_list = None num_labels = 1 # regression requires float as label type, let's cast it if needed - for split in raw_datasets.keys(): + for split in raw_datasets: if raw_datasets[split].features["label"].dtype not in ["float32", "float64"]: logger.warning( f"Label type for {split} set to float32, was {raw_datasets[split].features['label'].dtype}" diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 587f75084e..63b3b9ab8f 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -161,7 +161,7 @@ class DataTrainingArguments: def __post_init__(self): if self.task_name is not None: self.task_name = self.task_name.lower() - if self.task_name not in task_to_keys.keys(): + if self.task_name not in task_to_keys: raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys())) elif self.dataset_name is not None: pass @@ -335,7 +335,7 @@ def main(): else: raise ValueError("Need either a GLUE task or a test file for `do_predict`.") - for key in data_files.keys(): + for key in data_files: logger.info(f"load a local file for {key}: {data_files[key]}") if data_args.train_file.endswith(".csv"): diff --git a/examples/pytorch/text-generation/run_generation.py b/examples/pytorch/text-generation/run_generation.py index cea4c881ea..f89ca96eef 100755 --- a/examples/pytorch/text-generation/run_generation.py +++ b/examples/pytorch/text-generation/run_generation.py @@ -370,7 +370,7 @@ def main(): prompt_text = args.prompt if args.prompt else input("Model prompt >>> ") # Different models need different input formatting and/or extra arguments - requires_preprocessing = args.model_type in PREPROCESSING_FUNCTIONS.keys() + requires_preprocessing = args.model_type in PREPROCESSING_FUNCTIONS if requires_preprocessing: prepare_input = PREPROCESSING_FUNCTIONS.get(args.model_type) preprocessed_prompt_text = prepare_input(args, model, tokenizer, prompt_text) diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index c87a3f29fd..841337d676 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -357,7 +357,7 @@ def main(): raw_datasets = load_dataset(extension, data_files=data_files) # Trim a number of training examples if args.debug: - for split in raw_datasets.keys(): + for split in raw_datasets: raw_datasets[split] = raw_datasets[split].select(range(100)) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets. diff --git a/examples/tensorflow/image-classification/run_image_classification.py b/examples/tensorflow/image-classification/run_image_classification.py index 5339e77c22..097ef4c67d 100644 --- a/examples/tensorflow/image-classification/run_image_classification.py +++ b/examples/tensorflow/image-classification/run_image_classification.py @@ -327,7 +327,7 @@ def main(): ) # If we don't have a validation split, split off a percentage of train as validation. - data_args.train_val_split = None if "validation" in dataset.keys() else data_args.train_val_split + data_args.train_val_split = None if "validation" in dataset else data_args.train_val_split if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0: split = dataset["train"].train_test_split(data_args.train_val_split) dataset["train"] = split["train"] diff --git a/examples/tensorflow/language-modeling-tpu/prepare_tfrecord_shards.py b/examples/tensorflow/language-modeling-tpu/prepare_tfrecord_shards.py index aa90a9db52..a839c79a5c 100644 --- a/examples/tensorflow/language-modeling-tpu/prepare_tfrecord_shards.py +++ b/examples/tensorflow/language-modeling-tpu/prepare_tfrecord_shards.py @@ -151,7 +151,7 @@ def main(args): def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()} + concatenated_examples = {k: sum(examples[k], []) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, though you could add padding instead if the model supports it # In this, as in all things, we advise you to follow your heart 🫀 diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py index 0d776229d2..28a955734b 100755 --- a/examples/tensorflow/language-modeling/run_clm.py +++ b/examples/tensorflow/language-modeling/run_clm.py @@ -299,7 +299,7 @@ def main(): token=model_args.token, trust_remote_code=model_args.trust_remote_code, ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -339,7 +339,7 @@ def main(): **dataset_args, ) # If no validation data is there, validation_split_percentage will be used to divide the dataset. - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( extension, data_files=data_files, @@ -429,7 +429,7 @@ def main(): # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can # customize this part to your needs. diff --git a/examples/tensorflow/language-modeling/run_mlm.py b/examples/tensorflow/language-modeling/run_mlm.py index 5dcbd35729..ef2c43c69e 100755 --- a/examples/tensorflow/language-modeling/run_mlm.py +++ b/examples/tensorflow/language-modeling/run_mlm.py @@ -308,7 +308,7 @@ def main(): token=model_args.token, trust_remote_code=model_args.trust_remote_code, ) - if "validation" not in raw_datasets.keys(): + if "validation" not in raw_datasets: raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, @@ -446,7 +446,7 @@ def main(): # max_seq_length. def group_texts(examples): # Concatenate all texts. - concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()} + concatenated_examples = {k: list(chain(*examples[k])) for k in examples} total_length = len(concatenated_examples[list(examples.keys())[0]]) # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can # customize this part to your needs. diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index a42d5966b6..da09963028 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -132,7 +132,7 @@ class DataTrainingArguments: def __post_init__(self): self.task_name = self.task_name.lower() - if self.task_name not in task_to_keys.keys(): + if self.task_name not in task_to_keys: raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys())) @@ -268,7 +268,7 @@ def main(): data_files = {"data": data_args.predict_file} - for key in data_files.keys(): + for key in data_files: logger.info(f"Loading a local file for {key}: {data_files[key]}") if data_args.predict_file.endswith(".csv"): @@ -406,7 +406,7 @@ def main(): "test_mismatched": data_args.max_predict_samples, "user_data": None, } - for key in datasets.keys(): + for key in datasets: if key == "train" or key.startswith("validation"): assert "label" in datasets[key].features, f"Missing labels from {key} data!" if key == "train": diff --git a/examples/tensorflow/text-classification/run_text_classification.py b/examples/tensorflow/text-classification/run_text_classification.py index 7546c7bd32..c87c904008 100644 --- a/examples/tensorflow/text-classification/run_text_classification.py +++ b/examples/tensorflow/text-classification/run_text_classification.py @@ -272,7 +272,7 @@ def main(): data_files = {"train": data_args.train_file, "validation": data_args.validation_file, "test": data_args.test_file} data_files = {key: file for key, file in data_files.items() if file is not None} - for key in data_files.keys(): + for key in data_files: logger.info(f"Loading a local file for {key}: {data_files[key]}") if data_args.input_file_extension == "csv": diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index acfd450e87..70f9979a8f 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -678,7 +678,7 @@ class PretrainedConfig(PushToHubMixin): from_auto_class = kwargs.pop("_from_auto", False) commit_hash = kwargs.pop("_commit_hash", None) - gguf_file = kwargs.get("gguf_file", None) + gguf_file = kwargs.get("gguf_file") if trust_remote_code is True: logger.warning( @@ -1033,7 +1033,7 @@ class PretrainedConfig(PushToHubMixin): converts torch.dtype to a string of just the type. For example, `torch.float32` get converted into *"float32"* string, which can then be stored in the json format. """ - if d.get("torch_dtype", None) is not None: + if d.get("torch_dtype") is not None: if isinstance(d["torch_dtype"], dict): d["torch_dtype"] = {k: str(v).split(".")[-1] for k, v in d["torch_dtype"].items()} elif not isinstance(d["torch_dtype"], str): diff --git a/src/transformers/convert_pytorch_checkpoint_to_tf2.py b/src/transformers/convert_pytorch_checkpoint_to_tf2.py index fad6463e98..24e1f85064 100755 --- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py +++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py @@ -267,7 +267,7 @@ def convert_pt_checkpoint_to_tf( tf_model = model_class(config) # Load weights from tf checkpoint - if pytorch_checkpoint_path in aws_config_map.keys(): + if pytorch_checkpoint_path in aws_config_map: pytorch_checkpoint_path = cached_file( pytorch_checkpoint_path, WEIGHTS_NAME, force_download=not use_cached_models ) diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py index fcefc9270e..18f2980a38 100644 --- a/src/transformers/data/data_collator.py +++ b/src/transformers/data/data_collator.py @@ -326,8 +326,8 @@ class DataCollatorForTokenClassification(DataCollatorMixin): def torch_call(self, features): import torch - label_name = "label" if "label" in features[0].keys() else "labels" - labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None + label_name = "label" if "label" in features[0] else "labels" + labels = [feature[label_name] for feature in features] if label_name in features[0] else None no_labels_features = [{k: v for k, v in feature.items() if k != label_name} for feature in features] @@ -366,8 +366,8 @@ class DataCollatorForTokenClassification(DataCollatorMixin): def tf_call(self, features): import tensorflow as tf - label_name = "label" if "label" in features[0].keys() else "labels" - labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None + label_name = "label" if "label" in features[0] else "labels" + labels = [feature[label_name] for feature in features] if label_name in features[0] else None batch = pad_without_fast_tokenizer_warning( self.tokenizer, features, @@ -396,8 +396,8 @@ class DataCollatorForTokenClassification(DataCollatorMixin): return batch def numpy_call(self, features): - label_name = "label" if "label" in features[0].keys() else "labels" - labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None + label_name = "label" if "label" in features[0] else "labels" + labels = [feature[label_name] for feature in features] if label_name in features[0] else None batch = pad_without_fast_tokenizer_warning( self.tokenizer, features, @@ -573,7 +573,7 @@ class DataCollatorForMultipleChoice(DataCollatorMixin): import torch # Take labels out of the examples beforehand, because they aren't nested. - label_name = "label" if "label" in examples[0].keys() else "labels" + label_name = "label" if "label" in examples[0] else "labels" labels = [example.pop(label_name) for example in examples] batch_size = len(examples) @@ -602,7 +602,7 @@ class DataCollatorForMultipleChoice(DataCollatorMixin): def tf_call(self, features): # Implementation taken from the docs. import tensorflow as tf - label_name = "label" if "label" in features[0].keys() else "labels" + label_name = "label" if "label" in features[0] else "labels" labels = [feature.pop(label_name) for feature in features] batch_size = len(features) num_choices = len(features[0]["input_ids"]) @@ -671,8 +671,8 @@ class DataCollatorForSeq2Seq: if return_tensors is None: return_tensors = self.return_tensors - label_name = "label" if "label" in features[0].keys() else "labels" - labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None + label_name = "label" if "label" in features[0] else "labels" + labels = [feature[label_name] for feature in features] if label_name in features[0] else None # reconvert list[None] to None if necessary # this might occur when we pass {..., "labels": None} if labels is not None and all(label is None for label in labels): diff --git a/src/transformers/feature_extraction_sequence_utils.py b/src/transformers/feature_extraction_sequence_utils.py index 186f3e3111..e0be17bd7d 100644 --- a/src/transformers/feature_extraction_sequence_utils.py +++ b/src/transformers/feature_extraction_sequence_utils.py @@ -124,7 +124,7 @@ class SequenceFeatureExtractor(FeatureExtractionMixin): # We do this to allow using this method as a collate_fn function in PyTorch Dataloader if isinstance(processed_features, (list, tuple)) and isinstance(processed_features[0], (dict, BatchFeature)): processed_features = { - key: [example[key] for example in processed_features] for key in processed_features[0].keys() + key: [example[key] for example in processed_features] for key in processed_features[0] } # The model's main input name, usually `input_values`, has be passed for padding diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index d9d6ca0e21..59e1a40191 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -405,7 +405,7 @@ class FeatureExtractionMixin(PushToHubMixin): "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) diff --git a/src/transformers/generation/candidate_generator.py b/src/transformers/generation/candidate_generator.py index b62d6e7ed1..38a15f3dae 100644 --- a/src/transformers/generation/candidate_generator.py +++ b/src/transformers/generation/candidate_generator.py @@ -132,7 +132,7 @@ class AssistedCandidateGenerator(CandidateGenerator): ) # Remove potential default "logits_to_keep" key - if "logits_to_keep" in assistant_kwargs.keys() and not assistant_model._supports_logits_to_keep(): + if "logits_to_keep" in assistant_kwargs and not assistant_model._supports_logits_to_keep(): del assistant_kwargs["logits_to_keep"] # If the assistant is an encoder-decoder model, assume the encoder is different on the assistant. diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py index 697046b3ed..77ce12d969 100644 --- a/src/transformers/generation/configuration_utils.py +++ b/src/transformers/generation/configuration_utils.py @@ -614,10 +614,7 @@ class GenerationConfig(PushToHubMixin): ) # 1.4. Watermarking attributes if self.watermarking_config is not None: - if not ( - isinstance(self.watermarking_config, WatermarkingConfig) - or isinstance(self.watermarking_config, SynthIDTextWatermarkingConfig) - ): + if not (isinstance(self.watermarking_config, (WatermarkingConfig, SynthIDTextWatermarkingConfig))): minor_issues["watermarking_config"] = ( "`watermarking_config` as a dict is deprecated and will be removed in v4.54.0. Please construct " "`watermarking_config` object with `WatermarkingConfig` or `SynthIDTextWatermarkingConfig` class." @@ -847,7 +844,7 @@ class GenerationConfig(PushToHubMixin): "Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) @@ -1110,7 +1107,7 @@ class GenerationConfig(PushToHubMixin): converts torch.dtype to a string of just the type. For example, `torch.float32` get converted into *"float32"* string, which can then be stored in the json format. """ - if d.get("torch_dtype", None) is not None and not isinstance(d["torch_dtype"], str): + if d.get("torch_dtype") is not None and not isinstance(d["torch_dtype"], str): d["torch_dtype"] = str(d["torch_dtype"]).split(".")[1] for value in d.values(): if isinstance(value, dict): @@ -1247,7 +1244,7 @@ class GenerationConfig(PushToHubMixin): if decoder_config is not model_config: default_generation_config = GenerationConfig() decoder_config_dict = decoder_config.to_dict() - for attr in generation_config.to_dict().keys(): + for attr in generation_config.to_dict(): is_unset = getattr(generation_config, attr) == getattr(default_generation_config, attr) if attr in decoder_config_dict and is_unset: setattr(generation_config, attr, decoder_config_dict[attr]) diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py index 4948ad8452..c7b6c7a39d 100644 --- a/src/transformers/generation/logits_process.py +++ b/src/transformers/generation/logits_process.py @@ -1241,13 +1241,13 @@ class SequenceBiasLogitsProcessor(LogitsProcessor): f"`sequence_bias` has to be a non-empty dictionary, or non-empty list of lists but is {sequence_bias}." ) if isinstance(sequence_bias, dict) and any( - not isinstance(sequence_ids, tuple) for sequence_ids in sequence_bias.keys() + not isinstance(sequence_ids, tuple) for sequence_ids in sequence_bias ): raise ValueError(f"`sequence_bias` has to be a dict with tuples as keys, but is {sequence_bias}.") if isinstance(sequence_bias, dict) and any( any((not isinstance(token_id, (int, np.integer)) or token_id < 0) for token_id in sequence_ids) or len(sequence_ids) == 0 - for sequence_ids in sequence_bias.keys() + for sequence_ids in sequence_bias ): raise ValueError( f"Each key in `sequence_bias` has to be a non-empty tuple of positive integers, but is " diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py index 509adaa8e2..acd53a20b7 100644 --- a/src/transformers/generation/utils.py +++ b/src/transformers/generation/utils.py @@ -1867,7 +1867,7 @@ class GenerationMixin(ContinuousMixin): ) decoder_mapped_modules = [ - module_name for module_name in execution_device_map.keys() if decoder_name in module_name + module_name for module_name in execution_device_map if decoder_name in module_name ] # The decoder name may be present in `execution_device_map` in two forms: # a) each layer has a device mapping @@ -5275,7 +5275,7 @@ def stack_model_outputs(model_outputs: list[ModelOutput], config: PretrainedConf # Use a dictionary comprehension to gather attributes from all objects and concatenate them concatenated_data = { k: _concat([getattr(model_output, k) for model_output in model_outputs]) - for k in model_output_cls.__dataclass_fields__.keys() + for k in model_output_cls.__dataclass_fields__ } # Return a new object of the inferred class with the concatenated attributes diff --git a/src/transformers/generation/watermarking.py b/src/transformers/generation/watermarking.py index 9ce2539851..0b0743f99e 100644 --- a/src/transformers/generation/watermarking.py +++ b/src/transformers/generation/watermarking.py @@ -161,7 +161,7 @@ class WatermarkDetector: for batch_idx in range(ngram_tensors.shape[0]): frequencies_table = collections.Counter(ngram_tensors[batch_idx]) ngram_to_watermark_lookup = {} - for ngram_example in frequencies_table.keys(): + for ngram_example in frequencies_table: prefix = ngram_example if selfhash else ngram_example[:-1] target = ngram_example[-1] ngram_to_watermark_lookup[ngram_example] = self._get_ngram_score_cached(prefix, target) diff --git a/src/transformers/image_processing_base.py b/src/transformers/image_processing_base.py index 9b40b0da7b..4d708efb7c 100644 --- a/src/transformers/image_processing_base.py +++ b/src/transformers/image_processing_base.py @@ -229,7 +229,7 @@ class ImageProcessingMixin(PushToHubMixin): "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py index 55bd84cf27..c575770a65 100644 --- a/src/transformers/image_processing_utils_fast.py +++ b/src/transformers/image_processing_utils_fast.py @@ -225,7 +225,7 @@ class BaseImageProcessorFast(BaseImageProcessor): ) crop_size = kwargs.pop("crop_size", self.crop_size) self.crop_size = get_size_dict(crop_size, param_name="crop_size") if crop_size is not None else None - for key in self.valid_kwargs.__annotations__.keys(): + for key in self.valid_kwargs.__annotations__: kwarg = kwargs.pop(key, None) if kwarg is not None: setattr(self, key, kwarg) diff --git a/src/transformers/image_transforms.py b/src/transformers/image_transforms.py index deb988171a..a0bcff7a18 100644 --- a/src/transformers/image_transforms.py +++ b/src/transformers/image_transforms.py @@ -860,14 +860,14 @@ def _group_images_by_shape(nested_images, is_nested: bool = False): def _reconstruct_nested_structure(indices, processed_images): """Helper function to reconstruct a single level nested structure.""" # Find the maximum outer index - max_outer_idx = max(idx[0] for idx in indices.keys()) + max_outer_idx = max(idx[0] for idx in indices) # Create the outer list result = [None] * (max_outer_idx + 1) # Group indices by outer index nested_indices = defaultdict(list) - for i, j in indices.keys(): + for i, j in indices: nested_indices[i].append(j) for i in range(max_outer_idx + 1): diff --git a/src/transformers/integrations/accelerate.py b/src/transformers/integrations/accelerate.py index a404af54b2..9464a4a675 100644 --- a/src/transformers/integrations/accelerate.py +++ b/src/transformers/integrations/accelerate.py @@ -130,7 +130,7 @@ def init_on_device(device: "torch.device", include_buffers: bool = False): nn.Module.register_parameter = register_empty_parameter if include_buffers: nn.Module.register_buffer = register_empty_buffer - for torch_function_name in tensor_constructors_to_patch.keys(): + for torch_function_name in tensor_constructors_to_patch: setattr(torch, torch_function_name, patch_tensor_constructor(getattr(torch, torch_function_name))) yield finally: diff --git a/src/transformers/integrations/bitsandbytes.py b/src/transformers/integrations/bitsandbytes.py index 0508477cd9..1dd32b4268 100644 --- a/src/transformers/integrations/bitsandbytes.py +++ b/src/transformers/integrations/bitsandbytes.py @@ -164,7 +164,7 @@ def _replace_with_bnb_linear( current_key_name = [] current_key_name.append(name) - if (isinstance(module, nn.Linear) or isinstance(module, Conv1D)) and name not in modules_to_not_convert: + if (isinstance(module, (nn.Linear, Conv1D))) and name not in modules_to_not_convert: # Check if the current key is not in the `modules_to_not_convert` current_key_name_str = ".".join(current_key_name) if not any( @@ -382,7 +382,7 @@ def _create_accelerate_new_hook(old_hook): old_hook_attr = old_hook.__dict__ filtered_old_hook_attr = {} old_hook_init_signature = inspect.signature(old_hook_cls.__init__) - for k in old_hook_attr.keys(): + for k in old_hook_attr: if k in old_hook_init_signature.parameters: filtered_old_hook_attr[k] = old_hook_attr[k] new_hook = old_hook_cls(**filtered_old_hook_attr) diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index dabc9a6ef9..24e8546a50 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -333,7 +333,7 @@ def _load_state_dict_into_zero3_model(model_to_load, state_dict): # In sharded models, each shard has only part of the full state_dict, so only gather # parameters that are in the current state_dict. named_parameters = dict(module.named_parameters(prefix=prefix[:-1], recurse=False)) - params_to_gather = [named_parameters[k] for k in state_dict.keys() if k in named_parameters] + params_to_gather = [named_parameters[k] for k in state_dict if k in named_parameters] if len(params_to_gather) > 0: # because zero3 puts placeholders in model params, this context # manager gathers (unpartitions) the params of the current layer, then loads from diff --git a/src/transformers/integrations/flash_attention.py b/src/transformers/integrations/flash_attention.py index 43c65b46c8..9af6eba11f 100644 --- a/src/transformers/integrations/flash_attention.py +++ b/src/transformers/integrations/flash_attention.py @@ -23,7 +23,7 @@ def flash_attention_forward( softcap: Optional[float] = None, **kwargs, ) -> tuple[torch.Tensor, None]: - if kwargs.get("output_attentions", False) or kwargs.get("head_mask", None) is not None: + if kwargs.get("output_attentions", False) or kwargs.get("head_mask") is not None: logger.warning_once( "`flash_attention_2` does not support `output_attentions=True` or `head_mask`." " Please set your attention to `eager` if you want any of these features." diff --git a/src/transformers/integrations/flex_attention.py b/src/transformers/integrations/flex_attention.py index 9abff30e39..e310ff8ac5 100644 --- a/src/transformers/integrations/flex_attention.py +++ b/src/transformers/integrations/flex_attention.py @@ -282,7 +282,7 @@ def flex_attention_forward( value = repeat_kv(value, query.shape[1] // value.shape[1]) enable_gqa = False - kernel_options = kwargs.get("kernel_options", None) + kernel_options = kwargs.get("kernel_options") attn_output, attention_weights = compile_friendly_flex_attention( query, key, diff --git a/src/transformers/integrations/hqq.py b/src/transformers/integrations/hqq.py index fb0f604bfe..083ec53a2f 100755 --- a/src/transformers/integrations/hqq.py +++ b/src/transformers/integrations/hqq.py @@ -104,7 +104,7 @@ def prepare_for_hqq_linear(model, quantization_config=None, modules_to_not_conve quant_config = quantization_config.quant_config linear_tags = list(set(linear_tags) - set(skip_modules) - set(modules_to_not_convert)) - if any(key in linear_tags for key in quant_config.keys()): + if any(key in linear_tags for key in quant_config): # If the user doesn't specify a key from get_linear_tags, the layer is not quantized via (key, None) patch_params = dict.fromkeys(linear_tags) patch_params.update(quant_config) diff --git a/src/transformers/integrations/sdpa_attention.py b/src/transformers/integrations/sdpa_attention.py index bd56e462e3..f46c6f0f70 100644 --- a/src/transformers/integrations/sdpa_attention.py +++ b/src/transformers/integrations/sdpa_attention.py @@ -51,7 +51,7 @@ def sdpa_attention_forward( is_causal: Optional[bool] = None, **kwargs, ) -> tuple[torch.Tensor, None]: - if kwargs.get("output_attentions", False) or kwargs.get("head_mask", None) is not None: + if kwargs.get("output_attentions", False) or kwargs.get("head_mask") is not None: logger.warning_once( "`sdpa` attention does not support `output_attentions=True` or `head_mask`." " Please set your attention to `eager` if you want any of these features." diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py index 77c8fe428c..b6669a7b45 100644 --- a/src/transformers/keras_callbacks.py +++ b/src/transformers/keras_callbacks.py @@ -162,7 +162,7 @@ class KerasMetricCallback(keras.callbacks.Callback): def _postprocess_predictions_or_labels(self, inputs): if isinstance(inputs[0], dict): outputs = {} - for key in inputs[0].keys(): + for key in inputs[0]: outputs[key] = self._concatenate_batches([batch[key] for batch in inputs]) # If it's a dict with only one key, just return the array if len(outputs) == 1: @@ -242,7 +242,7 @@ class KerasMetricCallback(keras.callbacks.Callback): labels = {key: batch[key].numpy() for key in self.label_cols} elif isinstance(labels, dict): labels = {key: array.numpy() for key, array in labels.items()} - elif isinstance(labels, list) or isinstance(labels, tuple): + elif isinstance(labels, (list, tuple)): labels = [array.numpy() for array in labels] elif isinstance(labels, tf.Tensor): labels = labels.numpy() diff --git a/src/transformers/loss/loss_deformable_detr.py b/src/transformers/loss/loss_deformable_detr.py index 62080bcb3f..37a93a319d 100644 --- a/src/transformers/loss/loss_deformable_detr.py +++ b/src/transformers/loss/loss_deformable_detr.py @@ -136,7 +136,7 @@ def DeformableDetrForSegmentationLoss( aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) - loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) + loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict if k in weight_dict) return loss, loss_dict, auxiliary_outputs @@ -174,5 +174,5 @@ def DeformableDetrForObjectDetectionLoss( for i in range(config.decoder_layers - 1): aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) - loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) + loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict if k in weight_dict) return loss, loss_dict, auxiliary_outputs diff --git a/src/transformers/loss/loss_for_object_detection.py b/src/transformers/loss/loss_for_object_detection.py index 20b16f58bd..a6b2f9646e 100644 --- a/src/transformers/loss/loss_for_object_detection.py +++ b/src/transformers/loss/loss_for_object_detection.py @@ -522,7 +522,7 @@ def ForSegmentationLoss( for i in range(config.decoder_layers - 1): aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) - loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) + loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict if k in weight_dict) return loss, loss_dict, auxiliary_outputs @@ -558,5 +558,5 @@ def ForObjectDetectionLoss( for i in range(config.decoder_layers - 1): aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) - loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) + loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict if k in weight_dict) return loss, loss_dict, auxiliary_outputs diff --git a/src/transformers/loss/loss_grounding_dino.py b/src/transformers/loss/loss_grounding_dino.py index 17ea065e84..3e18fac866 100644 --- a/src/transformers/loss/loss_grounding_dino.py +++ b/src/transformers/loss/loss_grounding_dino.py @@ -267,5 +267,5 @@ def GroundingDinoForObjectDetectionLoss( aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) - loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) + loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict if k in weight_dict) return loss, loss_dict, auxiliary_outputs diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py index 2005390c85..8c68d8b8af 100644 --- a/src/transformers/modelcard.py +++ b/src/transformers/modelcard.py @@ -322,7 +322,7 @@ def infer_metric_tags_from_eval_results(eval_results): if eval_results is None: return {} result = {} - for key in eval_results.keys(): + for key in eval_results: if key.lower().replace(" ", "_") in METRIC_TAGS: result[key.lower().replace(" ", "_")] = key elif key.lower() == "rouge1": @@ -839,7 +839,7 @@ def make_markdown_table(lines): """ if lines is None or len(lines) == 0: return "" - col_widths = {key: len(str(key)) for key in lines[0].keys()} + col_widths = {key: len(str(key)) for key in lines[0]} for line in lines: for key, value in line.items(): if col_widths[key] < len(_maybe_round(value)): diff --git a/src/transformers/modeling_flax_pytorch_utils.py b/src/transformers/modeling_flax_pytorch_utils.py index 7d9f189799..07d83d5e4a 100644 --- a/src/transformers/modeling_flax_pytorch_utils.py +++ b/src/transformers/modeling_flax_pytorch_utils.py @@ -179,10 +179,10 @@ def convert_pytorch_state_dict_to_flax(pt_state_dict, flax_model): flax_state_dict = {} load_model_with_head_into_base_model = (model_prefix not in flax_model_params) and ( - model_prefix in {k.split(".")[0] for k in pt_state_dict.keys()} + model_prefix in {k.split(".")[0] for k in pt_state_dict} ) load_base_model_into_model_with_head = (model_prefix in flax_model_params) and ( - model_prefix not in {k.split(".")[0] for k in pt_state_dict.keys()} + model_prefix not in {k.split(".")[0] for k in pt_state_dict} ) # Need to change some parameters name to match Flax names @@ -267,10 +267,10 @@ def convert_pytorch_sharded_state_dict_to_flax(shard_filenames, flax_model): random_flax_state_dict = flatten_dict(flax_model_params) load_model_with_head_into_base_model = (model_prefix not in flax_model_params) and ( - model_prefix in {k.split(".")[0] for k in pt_state_dict.keys()} + model_prefix in {k.split(".")[0] for k in pt_state_dict} ) load_base_model_into_model_with_head = (model_prefix in flax_model_params) and ( - model_prefix not in {k.split(".")[0] for k in pt_state_dict.keys()} + model_prefix not in {k.split(".")[0] for k in pt_state_dict} ) # Need to change some parameters name to match Flax names for pt_key, pt_tensor in pt_state_dict.items(): @@ -381,10 +381,10 @@ def load_flax_weights_in_pytorch_model(pt_model, flax_state): pt_model_dict = pt_model.state_dict() load_model_with_head_into_base_model = (pt_model.base_model_prefix in flax_state) and ( - pt_model.base_model_prefix not in {k.split(".")[0] for k in pt_model_dict.keys()} + pt_model.base_model_prefix not in {k.split(".")[0] for k in pt_model_dict} ) load_base_model_into_model_with_head = (pt_model.base_model_prefix not in flax_state) and ( - pt_model.base_model_prefix in {k.split(".")[0] for k in pt_model_dict.keys()} + pt_model.base_model_prefix in {k.split(".")[0] for k in pt_model_dict} ) # keep track of unexpected & missing keys diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index 3f94a3c6ce..bc9a4d473f 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -138,7 +138,7 @@ def flax_shard_checkpoint(params, max_shard_size="10GB"): for idx, shard in enumerate(sharded_state_dicts): shard_file = FLAX_WEIGHTS_NAME.replace(".msgpack", f"-{idx + 1:05d}-of-{len(sharded_state_dicts):05d}.msgpack") shards[shard_file] = shard - for weight_name in shard.keys(): + for weight_name in shard: weight_map[weight_name] = shard_file # Add the metadata @@ -963,7 +963,7 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): # Mismatched keys contains tuples key/shape1/shape2 of weights in the checkpoint that have a shape not # matching the weights in the model. mismatched_keys = [] - for key in state.keys(): + for key in state: if key in random_state and state[key].shape != random_state[key].shape: if ignore_mismatched_sizes: mismatched_keys.append((key, state[key].shape, random_state[key].shape)) @@ -1169,11 +1169,7 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): for filename in os.listdir(save_directory): full_filename = os.path.join(save_directory, filename) weights_no_suffix = weights_name.replace(".bin", "").replace(".safetensors", "") - if ( - filename.startswith(weights_no_suffix) - and os.path.isfile(full_filename) - and filename not in shards.keys() - ): + if filename.startswith(weights_no_suffix) and os.path.isfile(full_filename) and filename not in shards: os.remove(full_filename) if index is None: diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py index f736e37d4e..feb6c6c391 100644 --- a/src/transformers/modeling_gguf_pytorch_utils.py +++ b/src/transformers/modeling_gguf_pytorch_utils.py @@ -315,7 +315,7 @@ def get_gguf_hf_weights_map( # hf => gguf and gguf => hf mappings are reversed gguf_to_hf_name_map = {} state_dict = hf_model.state_dict() - for hf_name in state_dict.keys(): + for hf_name in state_dict: # An exception for qwen2moe model, where the expert layers are packed if model_type == "qwen2moe" and "mlp.experts." in hf_name: hf_name = re.sub(r"mlp.experts.\d+.", "mlp.experts.", hf_name) diff --git a/src/transformers/modeling_tf_pytorch_utils.py b/src/transformers/modeling_tf_pytorch_utils.py index 5b9f38e1bc..8f688af7be 100644 --- a/src/transformers/modeling_tf_pytorch_utils.py +++ b/src/transformers/modeling_tf_pytorch_utils.py @@ -330,7 +330,7 @@ def load_pytorch_state_dict_in_tf2_model( tf_model(tf_inputs, training=False) # Make sure model is built # Convert old format to new format if needed from a PyTorch state_dict tf_keys_to_pt_keys = {} - for key in pt_state_dict.keys(): + for key in pt_state_dict: new_key = None if "gamma" in key: new_key = key.replace("gamma", "weight") @@ -361,7 +361,7 @@ def load_pytorch_state_dict_in_tf2_model( # and there is no MainLayer class. This means that TF base classes have one # extra layer in their weight names, corresponding to the MainLayer class. This code block compensates for that. start_prefix_to_remove = "" - if not any(s.startswith(tf_model.base_model_prefix) for s in tf_keys_to_pt_keys.keys()): + if not any(s.startswith(tf_model.base_model_prefix) for s in tf_keys_to_pt_keys): start_prefix_to_remove = tf_model.base_model_prefix + "." symbolic_weights = tf_model.trainable_weights + tf_model.non_trainable_weights @@ -573,7 +573,7 @@ def load_tf2_state_dict_in_pytorch_model(pt_model, tf_state_dict, allow_missing_ # Make sure we are able to load PyTorch base models as well as derived models (with heads) # TF models always have a prefix, some of PyTorch models (base ones) don't start_prefix_to_remove = "" - if not any(s.startswith(pt_model.base_model_prefix) for s in current_pt_params_dict.keys()): + if not any(s.startswith(pt_model.base_model_prefix) for s in current_pt_params_dict): start_prefix_to_remove = pt_model.base_model_prefix + "." # Build a map from potential PyTorch weight names to TF 2.0 Variables diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 3e2564bc7a..c7bb80656d 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -991,13 +991,13 @@ def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_size # here we check if the current weight is among the weights from the H5 file # If yes, get the weight_value of the corresponding weight from the H5 file # If not, make the value to None - saved_weight_value = saved_weights.get(symbolic_weight_name, None) + saved_weight_value = saved_weights.get(symbolic_weight_name) # Retrocompatibility patch: some embeddings are stored with the weights name (e.g. Bart's # `model.shared/embeddings:0` are stored as `model.shared/weights:0`) if saved_weight_value is None and symbolic_weight_name.endswith("embeddings:0"): symbolic_weight_name = symbolic_weight_name[:-12] + "weight:0" - saved_weight_value = saved_weights.get(symbolic_weight_name, None) + saved_weight_value = saved_weights.get(symbolic_weight_name) # Add the updated name to the final list for computing missing/unexpected values symbolic_weights_names.add(symbolic_weight_name) @@ -1637,7 +1637,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT for key, val in y.items(): if key in arg_names and key not in x: x[key] = val - elif output_to_label.get(key, None) in arg_names and key not in x: + elif output_to_label.get(key) in arg_names and key not in x: x[output_to_label[key]] = val if y is None: y = {key: val for key, val in x.items() if key in label_kwargs} @@ -1662,7 +1662,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT # This next block matches outputs to label keys. Tensorflow's standard method for doing this # can get very confused if any of the keys contain nested values (e.g. lists/tuples of Tensors) if isinstance(y, dict) and len(y) == 1: - if list(y.keys())[0] in y_pred.keys(): + if list(y.keys())[0] in y_pred: y_pred = y_pred[list(y.keys())[0]] elif list(y_pred.keys())[0] == "loss": y_pred = y_pred[1] @@ -1672,7 +1672,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT elif isinstance(y, dict): # If the labels are a dict, match keys from the output by name y_pred = {key: val for key, val in y_pred.items() if key in y} - elif isinstance(y, tuple) or isinstance(y, list): + elif isinstance(y, (tuple, list)): # If the labels are a tuple/list, match keys to the output by order, skipping the loss. if list(y_pred.keys())[0] == "loss": y_pred = y_pred.to_tuple()[1:] @@ -1745,7 +1745,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT for key, val in y.items(): if key in arg_names and key not in x: x[key] = val - elif output_to_label.get(key, None) in arg_names and key not in x: + elif output_to_label.get(key) in arg_names and key not in x: x[output_to_label[key]] = val if y is None: y = {key: val for key, val in x.items() if key in label_kwargs} @@ -1769,7 +1769,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT # This next block matches outputs to label keys. Tensorflow's standard method for doing this # can get very confused if any of the keys contain nested values (e.g. lists/tuples of Tensors) if isinstance(y, dict) and len(y) == 1: - if list(y.keys())[0] in y_pred.keys(): + if list(y.keys())[0] in y_pred: y_pred = y_pred[list(y.keys())[0]] elif list(y_pred.keys())[0] == "loss": y_pred = y_pred[1] @@ -1779,7 +1779,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT elif isinstance(y, dict): # If the labels are a dict, match keys from the output by name y_pred = {key: val for key, val in y_pred.items() if key in y} - elif isinstance(y, tuple) or isinstance(y, list): + elif isinstance(y, (tuple, list)): # If the labels are a tuple/list, match keys to the output by order, skipping the loss. if list(y_pred.keys())[0] == "loss": y_pred = y_pred.to_tuple()[1:] @@ -2464,11 +2464,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT # If we have a shard file that is not going to be replaced, we delete it, but only from the main process # in distributed settings to avoid race conditions. weights_no_suffix = weights_name.replace(".bin", "").replace(".safetensors", "") - if ( - filename.startswith(weights_no_suffix) - and os.path.isfile(full_filename) - and filename not in shards.keys() - ): + if filename.startswith(weights_no_suffix) and os.path.isfile(full_filename) and filename not in shards: os.remove(full_filename) if index is None: diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index a97b446c96..5a7c6eb087 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1357,12 +1357,12 @@ def _get_torch_dtype( elif hasattr(torch, torch_dtype): torch_dtype = getattr(torch, torch_dtype) config.torch_dtype = torch_dtype - for sub_config_key in config.sub_configs.keys(): + for sub_config_key in config.sub_configs: sub_config = getattr(config, sub_config_key) sub_config.torch_dtype = torch_dtype elif isinstance(torch_dtype, torch.dtype): config.torch_dtype = torch_dtype - for sub_config_key in config.sub_configs.keys(): + for sub_config_key in config.sub_configs: sub_config = getattr(config, sub_config_key) sub_config.torch_dtype = torch_dtype elif isinstance(torch_dtype, dict): @@ -1388,7 +1388,7 @@ def _get_torch_dtype( # set fp32 as the default dtype for BC default_dtype = torch.get_default_dtype() config.torch_dtype = default_dtype - for key in config.sub_configs.keys(): + for key in config.sub_configs: value = getattr(config, key) value.torch_dtype = default_dtype @@ -1446,7 +1446,7 @@ def _get_device_map( # `inferred_max_memory` contains non-reserved memory. There may be *unused* reserved memory in the GPU, # which we can use to allocate parameters. - for device_name in inferred_max_memory.keys(): + for device_name in inferred_max_memory: if isinstance(device_name, int): # it's a GPU device if is_torch_xpu_available(): unused_memory = torch.xpu.memory_reserved(device_name) - torch.xpu.memory_allocated(device_name) @@ -3002,9 +3002,9 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH f"Encoder module {encoder_pointer} does not match decoder module {decoder_pointer}" ) - all_encoder_weights = {module_name + "/" + sub_name for sub_name in encoder_modules.keys()} + all_encoder_weights = {module_name + "/" + sub_name for sub_name in encoder_modules} encoder_layer_pos = 0 - for name in decoder_modules.keys(): + for name in decoder_modules: if name.isdigit(): encoder_name = str(int(name) + encoder_layer_pos) decoder_name = name @@ -3942,7 +3942,7 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH # Handle the case where some state_dict keys shouldn't be saved if self._keys_to_ignore_on_save is not None: for ignore_key in self._keys_to_ignore_on_save: - if ignore_key in state_dict.keys(): + if ignore_key in state_dict: del state_dict[ignore_key] # Rename state_dict keys before saving to file. Do nothing unless overridden in a particular model. @@ -4057,7 +4057,7 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH if ( filename.startswith(weights_no_suffix) and os.path.isfile(full_filename) - and filename not in state_dict_split.filename_to_tensors.keys() + and filename not in state_dict_split.filename_to_tensors and is_main_process and reg.fullmatch(filename_no_suffix) is not None ): @@ -5334,7 +5334,7 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH if device_map is not None: device_map = {k[len(_prefix) :] if k.startswith(_prefix) else k: v for k, v in device_map.items()} # small sanity check: the base model should not contain task-specific head keys - task_specific_expected_keys = [s for s in model.state_dict().keys() if not s.startswith(_prefix)] + task_specific_expected_keys = [s for s in model.state_dict() if not s.startswith(_prefix)] base_model_expected_keys = list(model_to_load.state_dict().keys()) if any( key in task_specific_expected_keys and key not in base_model_expected_keys for key in checkpoint_keys diff --git a/src/transformers/models/aria/modeling_aria.py b/src/transformers/models/aria/modeling_aria.py index 9144cc6bdd..9e6125720d 100644 --- a/src/transformers/models/aria/modeling_aria.py +++ b/src/transformers/models/aria/modeling_aria.py @@ -191,7 +191,7 @@ class AriaProjector(nn.Module): """ batch_size, num_patches = key_value_states.shape[0], key_value_states.shape[1] - if num_patches not in self.patch_to_query_dict.keys(): + if num_patches not in self.patch_to_query_dict: raise KeyError( f"Number of patches {num_patches} not found in patch_to_query_dict amongst possible values {self.patch_to_query_dict.keys()}." ) diff --git a/src/transformers/models/aria/modular_aria.py b/src/transformers/models/aria/modular_aria.py index a531bc43b3..c27916a0df 100644 --- a/src/transformers/models/aria/modular_aria.py +++ b/src/transformers/models/aria/modular_aria.py @@ -438,7 +438,7 @@ class AriaProjector(nn.Module): """ batch_size, num_patches = key_value_states.shape[0], key_value_states.shape[1] - if num_patches not in self.patch_to_query_dict.keys(): + if num_patches not in self.patch_to_query_dict: raise KeyError( f"Number of patches {num_patches} not found in patch_to_query_dict amongst possible values {self.patch_to_query_dict.keys()}." ) diff --git a/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py b/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py index 119114033c..325e0f65b4 100644 --- a/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py +++ b/src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py @@ -105,7 +105,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, config): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "qkv" in key: diff --git a/src/transformers/models/auto/auto_factory.py b/src/transformers/models/auto/auto_factory.py index eeb959de18..8439affd6d 100644 --- a/src/transformers/models/auto/auto_factory.py +++ b/src/transformers/models/auto/auto_factory.py @@ -425,7 +425,7 @@ class _BaseAutoModelClass: def from_config(cls, config, **kwargs): trust_remote_code = kwargs.pop("trust_remote_code", None) has_remote_code = hasattr(config, "auto_map") and cls.__name__ in config.auto_map - has_local_code = type(config) in cls._model_mapping.keys() + has_local_code = type(config) in cls._model_mapping if has_remote_code: class_ref = config.auto_map[cls.__name__] if "--" in class_ref: @@ -451,13 +451,13 @@ class _BaseAutoModelClass: _ = kwargs.pop("code_revision", None) model_class = add_generation_mixin_to_remote_model(model_class) return model_class._from_config(config, **kwargs) - elif type(config) in cls._model_mapping.keys(): + elif type(config) in cls._model_mapping: model_class = _get_model_class(config, cls._model_mapping) return model_class._from_config(config, **kwargs) raise ValueError( f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" - f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." + f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping)}." ) @classmethod @@ -468,7 +468,7 @@ class _BaseAutoModelClass: @classmethod def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike[str]], *model_args, **kwargs): config = kwargs.pop("config", None) - trust_remote_code = kwargs.get("trust_remote_code", None) + trust_remote_code = kwargs.get("trust_remote_code") kwargs["_from_auto"] = True hub_kwargs_names = [ "cache_dir", @@ -538,10 +538,10 @@ class _BaseAutoModelClass: kwargs_orig = copy.deepcopy(kwargs) # ensure not to pollute the config object with torch_dtype="auto" - since it's # meaningless in the context of the config object - torch.dtype values are acceptable - if kwargs.get("torch_dtype", None) == "auto": + if kwargs.get("torch_dtype") == "auto": _ = kwargs.pop("torch_dtype") # to not overwrite the quantization_config if config has a quantization_config - if kwargs.get("quantization_config", None) is not None: + if kwargs.get("quantization_config") is not None: _ = kwargs.pop("quantization_config") config, kwargs = AutoConfig.from_pretrained( @@ -560,7 +560,7 @@ class _BaseAutoModelClass: kwargs["quantization_config"] = kwargs_orig["quantization_config"] has_remote_code = hasattr(config, "auto_map") and cls.__name__ in config.auto_map - has_local_code = type(config) in cls._model_mapping.keys() + has_local_code = type(config) in cls._model_mapping upstream_repo = None if has_remote_code: class_ref = config.auto_map[cls.__name__] @@ -593,7 +593,7 @@ class _BaseAutoModelClass: return model_class.from_pretrained( pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs ) - elif type(config) in cls._model_mapping.keys(): + elif type(config) in cls._model_mapping: model_class = _get_model_class(config, cls._model_mapping) if model_class.config_class == config.sub_configs.get("text_config", None): config = config.get_text_config() @@ -602,7 +602,7 @@ class _BaseAutoModelClass: ) raise ValueError( f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" - f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." + f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping)}." ) @classmethod @@ -636,7 +636,7 @@ class _BaseAutoBackboneClass(_BaseAutoModelClass): config = kwargs.pop("config", TimmBackboneConfig()) - if kwargs.get("out_features", None) is not None: + if kwargs.get("out_features") is not None: raise ValueError("Cannot specify `out_features` for timm backbones") if kwargs.get("output_loading_info", False): @@ -820,7 +820,7 @@ class _LazyAutoMapping(OrderedDict[type[PretrainedConfig], _LazyAutoMappingValue mapping_keys = [ self._load_attr_from_module(key, name) for key, name in self._config_mapping.items() - if key in self._model_mapping.keys() + if key in self._model_mapping ] return mapping_keys + list(self._extra_content.keys()) @@ -837,7 +837,7 @@ class _LazyAutoMapping(OrderedDict[type[PretrainedConfig], _LazyAutoMappingValue mapping_values = [ self._load_attr_from_module(key, name) for key, name in self._model_mapping.items() - if key in self._config_mapping.keys() + if key in self._config_mapping ] return mapping_values + list(self._extra_content.values()) @@ -847,8 +847,8 @@ class _LazyAutoMapping(OrderedDict[type[PretrainedConfig], _LazyAutoMappingValue self._load_attr_from_module(key, self._config_mapping[key]), self._load_attr_from_module(key, self._model_mapping[key]), ) - for key in self._model_mapping.keys() - if key in self._config_mapping.keys() + for key in self._model_mapping + if key in self._config_mapping ] return mapping_items + list(self._extra_content.items()) @@ -869,7 +869,7 @@ class _LazyAutoMapping(OrderedDict[type[PretrainedConfig], _LazyAutoMappingValue """ if hasattr(key, "__name__") and key.__name__ in self._reverse_config_mapping: model_type = self._reverse_config_mapping[key.__name__] - if model_type in self._model_mapping.keys() and not exist_ok: + if model_type in self._model_mapping and not exist_ok: raise ValueError(f"'{key}' is already used by a Transformers model.") self._extra_content[key] = value diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index 12f8f4f4c5..99ba934986 100644 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -974,10 +974,10 @@ class _LazyConfigMapping(OrderedDict[str, type[PretrainedConfig]]): return list(self._mapping.keys()) + list(self._extra_content.keys()) def values(self) -> list[type[PretrainedConfig]]: - return [self[k] for k in self._mapping.keys()] + list(self._extra_content.values()) + return [self[k] for k in self._mapping] + list(self._extra_content.values()) def items(self) -> list[tuple[str, type[PretrainedConfig]]]: - return [(k, self[k]) for k in self._mapping.keys()] + list(self._extra_content.items()) + return [(k, self[k]) for k in self._mapping] + list(self._extra_content.items()) def __iter__(self) -> Iterator[str]: return iter(list(self._mapping.keys()) + list(self._extra_content.keys())) @@ -989,7 +989,7 @@ class _LazyConfigMapping(OrderedDict[str, type[PretrainedConfig]]): """ Register a new configuration in this mapping. """ - if key in self._mapping.keys() and not exist_ok: + if key in self._mapping and not exist_ok: raise ValueError(f"'{key}' is already used by a Transformers config, pick another name.") self._extra_content[key] = value @@ -1230,7 +1230,7 @@ class AutoConfig: "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index 0878c5ce30..7816e739c6 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -342,7 +342,7 @@ class AutoFeatureExtractor: "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) @@ -400,7 +400,7 @@ class AutoFeatureExtractor: raise ValueError( f"Unrecognized feature extractor in {pretrained_model_name_or_path}. Should have a " f"`feature_extractor_type` key in its {FEATURE_EXTRACTOR_NAME} of {CONFIG_NAME}, or one of the following " - f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in FEATURE_EXTRACTOR_MAPPING_NAMES.keys())}" + f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in FEATURE_EXTRACTOR_MAPPING_NAMES)}" ) @staticmethod diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index ec55316484..cefa1335eb 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -451,7 +451,7 @@ class AutoImageProcessor: "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) @@ -625,7 +625,7 @@ class AutoImageProcessor: raise ValueError( f"Unrecognized image processor in {pretrained_model_name_or_path}. Should have a " f"`image_processor_type` key in its {IMAGE_PROCESSOR_NAME} of {CONFIG_NAME}, or one of the following " - f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in IMAGE_PROCESSOR_MAPPING_NAMES.keys())}" + f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in IMAGE_PROCESSOR_MAPPING_NAMES)}" ) @staticmethod diff --git a/src/transformers/models/auto/processing_auto.py b/src/transformers/models/auto/processing_auto.py index cc2be544f4..0d711cee06 100644 --- a/src/transformers/models/auto/processing_auto.py +++ b/src/transformers/models/auto/processing_auto.py @@ -261,7 +261,7 @@ class AutoProcessor: "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) @@ -276,9 +276,7 @@ class AutoProcessor: # First, let's see if we have a processor or preprocessor config. # Filter the kwargs for `cached_file`. - cached_file_kwargs = { - key: kwargs[key] for key in inspect.signature(cached_file).parameters.keys() if key in kwargs - } + cached_file_kwargs = {key: kwargs[key] for key in inspect.signature(cached_file).parameters if key in kwargs} # We don't want to raise cached_file_kwargs.update( { diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index 29849fdb24..f9832df525 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -875,7 +875,7 @@ def get_tokenizer_config( raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") token = use_auth_token - commit_hash = kwargs.get("_commit_hash", None) + commit_hash = kwargs.get("_commit_hash") resolved_config_file = cached_file( pretrained_model_name_or_path, TOKENIZER_CONFIG_FILE, @@ -1000,7 +1000,7 @@ class AutoTokenizer: "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) @@ -1012,7 +1012,7 @@ class AutoTokenizer: use_fast = kwargs.pop("use_fast", True) tokenizer_type = kwargs.pop("tokenizer_type", None) trust_remote_code = kwargs.pop("trust_remote_code", None) - gguf_file = kwargs.get("gguf_file", None) + gguf_file = kwargs.get("gguf_file") # First, let's see whether the tokenizer_type is passed so that we can leverage it if tokenizer_type is not None: @@ -1022,7 +1022,7 @@ class AutoTokenizer: if tokenizer_class_tuple is None: raise ValueError( f"Passed `tokenizer_type` {tokenizer_type} does not exist. `tokenizer_type` should be one of " - f"{', '.join(c for c in TOKENIZER_MAPPING_NAMES.keys())}." + f"{', '.join(c for c in TOKENIZER_MAPPING_NAMES)}." ) tokenizer_class_name, tokenizer_fast_class_name = tokenizer_class_tuple @@ -1142,7 +1142,7 @@ class AutoTokenizer: raise ValueError( f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n" - f"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING.keys())}." + f"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING)}." ) @staticmethod diff --git a/src/transformers/models/auto/video_processing_auto.py b/src/transformers/models/auto/video_processing_auto.py index 77a8c458bd..545fcc4d92 100644 --- a/src/transformers/models/auto/video_processing_auto.py +++ b/src/transformers/models/auto/video_processing_auto.py @@ -291,7 +291,7 @@ class AutoVideoProcessor: "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) @@ -364,7 +364,7 @@ class AutoVideoProcessor: raise ValueError( f"Unrecognized video processor in {pretrained_model_name_or_path}. Should have a " f"`video_processor_type` key in its {VIDEO_PROCESSOR_NAME} of {CONFIG_NAME}, or one of the following " - f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in VIDEO_PROCESSOR_MAPPING_NAMES.keys())}" + f"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in VIDEO_PROCESSOR_MAPPING_NAMES)}" ) @staticmethod diff --git a/src/transformers/models/bit/convert_bit_to_pytorch.py b/src/transformers/models/bit/convert_bit_to_pytorch.py index abc24290ab..814db3ca4f 100644 --- a/src/transformers/models/bit/convert_bit_to_pytorch.py +++ b/src/transformers/models/bit/convert_bit_to_pytorch.py @@ -94,7 +94,7 @@ def convert_bit_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub=Fal # load state_dict of original model state_dict = timm_model.state_dict() - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) state_dict[rename_key(key)] = val.squeeze() if "head" in key else val diff --git a/src/transformers/models/blip/modeling_blip.py b/src/transformers/models/blip/modeling_blip.py index 3644c71cc0..267b0ffcb0 100644 --- a/src/transformers/models/blip/modeling_blip.py +++ b/src/transformers/models/blip/modeling_blip.py @@ -443,7 +443,7 @@ class BlipPreTrainedModel(PreTrainedModel): def _init_weights(self, module): """Initialize the weights""" factor = self.config.initializer_range - if isinstance(module, nn.Conv2d) or isinstance(module, nn.Embedding) or isinstance(module, nn.Linear): + if isinstance(module, (nn.Conv2d, nn.Embedding, nn.Linear)): module.weight.data.normal_(mean=0.0, std=factor) if hasattr(module, "bias") and module.bias is not None: module.bias.data.zero_() diff --git a/src/transformers/models/blip/modeling_tf_blip_text.py b/src/transformers/models/blip/modeling_tf_blip_text.py index ec86f6d26a..7dae1126e0 100644 --- a/src/transformers/models/blip/modeling_tf_blip_text.py +++ b/src/transformers/models/blip/modeling_tf_blip_text.py @@ -1096,8 +1096,8 @@ class TFBlipTextLMHeadModel(TFBlipTextPreTrainedModel): "input_ids": input_ids, "attention_mask": attention_mask, "past_key_values": past_key_values, - "encoder_hidden_states": model_kwargs.get("encoder_hidden_states", None), - "encoder_attention_mask": model_kwargs.get("encoder_attention_mask", None), + "encoder_hidden_states": model_kwargs.get("encoder_hidden_states"), + "encoder_attention_mask": model_kwargs.get("encoder_attention_mask"), "is_decoder": True, } diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py index b9c9b27bd3..26be31dcbb 100644 --- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py @@ -114,7 +114,7 @@ def convert_bloom_checkpoint_to_pytorch( if tensors is None: tensors = temp else: - for key in tensors.keys(): + for key in tensors: if any(key.endswith(end) for end in WEIGHTS_TO_AVERAGE_ENDSWITH): # We average (sum and then divide) some weights across TP ranks (see https://github.com/bigscience-workshop/Megatron-DeepSpeed/blob/olruwase/sync_layer_norms/megatron/training.py#L425) tensors[key] += temp[key] @@ -125,7 +125,7 @@ def convert_bloom_checkpoint_to_pytorch( tensors[key] = torch.cat([tensors[key], temp[key]], dim=cat_dim) # Divide by the number of TP the weights we want to average - for key in tensors.keys(): + for key in tensors: if any(key.endswith(end) for end in WEIGHTS_TO_AVERAGE_ENDSWITH): tensors[key] = tensors[key] / pretraining_tp torch.save( @@ -136,7 +136,7 @@ def convert_bloom_checkpoint_to_pytorch( ), ) - for key in tensors.keys(): + for key in tensors: value = tensors[key] total_size += value.numel() * get_dtype_size(value.dtype) if key not in index_dict["weight_map"]: @@ -174,7 +174,7 @@ def convert_bloom_checkpoint_to_pytorch( if tensors is None: tensors = temp else: - for key in tensors.keys(): + for key in tensors: # We average (sum and then divide) some weights across TP ranks (see https://github.com/bigscience-workshop/Megatron-DeepSpeed/blob/olruwase/sync_layer_norms/megatron/training.py#L425) if any(key.endswith(end) for end in WEIGHTS_TO_AVERAGE_ENDSWITH): tensors[key] += temp[key] @@ -185,7 +185,7 @@ def convert_bloom_checkpoint_to_pytorch( tensors[key] = torch.cat([tensors[key], temp[key]], dim=cat_dim) # Divide by the number of TP the weights we want to average - for key in tensors.keys(): + for key in tensors: if any(key.endswith(end) for end in WEIGHTS_TO_AVERAGE_ENDSWITH): tensors[key] = tensors[key] / pretraining_tp diff --git a/src/transformers/models/bros/convert_bros_to_pytorch.py b/src/transformers/models/bros/convert_bros_to_pytorch.py index c0984f2c74..35c89a88da 100644 --- a/src/transformers/models/bros/convert_bros_to_pytorch.py +++ b/src/transformers/models/bros/convert_bros_to_pytorch.py @@ -55,7 +55,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, model): # rename keys - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) orig_state_dict[rename_key(key)] = val diff --git a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py index 6f4a60737b..86b766eabf 100644 --- a/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py +++ b/src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py @@ -112,7 +112,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, config): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if key.startswith("clip_model") and "attn.in_proj" in key: @@ -172,7 +172,7 @@ def convert_clipseg_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_ state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) # remove some keys - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if key.startswith("model"): state_dict.pop(key, None) diff --git a/src/transformers/models/clvp/tokenization_clvp.py b/src/transformers/models/clvp/tokenization_clvp.py index 61bd5964ec..4b0b285561 100644 --- a/src/transformers/models/clvp/tokenization_clvp.py +++ b/src/transformers/models/clvp/tokenization_clvp.py @@ -300,7 +300,7 @@ class ClvpTokenizer(PreTrainedTokenizer): # if the token is "Ġ" we replace it with "[SPACE]" (if "[SPACE]" is present in the vocab), otherwise we keep the "Ġ". bpe_tokens.extend( - "[SPACE]" if bpe_token == "\u0120" and "[SPACE]" in self.encoder.keys() else bpe_token + "[SPACE]" if bpe_token == "\u0120" and "[SPACE]" in self.encoder else bpe_token for bpe_token in self.bpe(token).split(" ") ) diff --git a/src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py index 91f00668be..22658419eb 100644 --- a/src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py @@ -267,7 +267,7 @@ def convert_conditional_detr_checkpoint(model_name, pytorch_dump_folder_path): read_in_q_k_v(state_dict, is_panoptic=is_panoptic) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them prefix = "conditional_detr.model." if is_panoptic else "model." - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if is_panoptic: if ( key.startswith("conditional_detr") diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py index c63598e0d6..f48af23ccf 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py @@ -327,8 +327,8 @@ class ConditionalDetrImageProcessorFast(BaseImageProcessorFast): self.size = get_size_dict(size, max_size=max_size, default_to_square=False) # Backwards compatibility - do_convert_annotations = kwargs.get("do_convert_annotations", None) - do_normalize = kwargs.get("do_normalize", None) + do_convert_annotations = kwargs.get("do_convert_annotations") + do_normalize = kwargs.get("do_normalize") if do_convert_annotations is None and getattr(self, "do_convert_annotations", None) is None: self.do_convert_annotations = do_normalize if do_normalize is not None else self.do_normalize diff --git a/src/transformers/models/convnext/convert_convnext_to_pytorch.py b/src/transformers/models/convnext/convert_convnext_to_pytorch.py index 27315ed73f..426ed98b88 100644 --- a/src/transformers/models/convnext/convert_convnext_to_pytorch.py +++ b/src/transformers/models/convnext/convert_convnext_to_pytorch.py @@ -128,11 +128,11 @@ def convert_convnext_checkpoint(checkpoint_url, pytorch_dump_folder_path): # load original state_dict from URL state_dict = torch.hub.load_state_dict_from_url(checkpoint_url)["model"] # rename keys - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) state_dict[rename_key(key)] = val # add prefix to all keys expect classifier head - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) if not key.startswith("classifier"): key = "convnext." + key diff --git a/src/transformers/models/convnextv2/convert_convnextv2_to_pytorch.py b/src/transformers/models/convnextv2/convert_convnextv2_to_pytorch.py index 8094ecf0d6..d23f248816 100644 --- a/src/transformers/models/convnextv2/convert_convnextv2_to_pytorch.py +++ b/src/transformers/models/convnextv2/convert_convnextv2_to_pytorch.py @@ -153,11 +153,11 @@ def convert_convnextv2_checkpoint(checkpoint_url, pytorch_dump_folder_path, save print("Converting model parameters...") # rename keys - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) state_dict[rename_key(key)] = val # add prefix to all keys expect classifier head - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) if not key.startswith("classifier"): key = "convnextv2." + key diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index e774621e19..1dce90147b 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -686,15 +686,15 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss): # Copied from transformers.models.gpt2.modeling_tf_gpt2.TFGPT2LMHeadModel.prepare_inputs_for_generation def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_cache=None, **kwargs): - token_type_ids = kwargs.get("token_type_ids", None) + token_type_ids = kwargs.get("token_type_ids") # only last token for inputs_ids if past is defined in kwargs if past_key_values: inputs = tf.expand_dims(inputs[:, -1], -1) if token_type_ids is not None: token_type_ids = tf.expand_dims(token_type_ids[:, -1], -1) - position_ids = kwargs.get("position_ids", None) - attention_mask = kwargs.get("attention_mask", None) + position_ids = kwargs.get("position_ids") + attention_mask = kwargs.get("attention_mask") if attention_mask is not None and position_ids is None: position_ids = tf.math.cumsum(attention_mask, axis=-1, exclusive=True) diff --git a/src/transformers/models/d_fine/convert_d_fine_original_pytorch_checkpoint_to_hf.py b/src/transformers/models/d_fine/convert_d_fine_original_pytorch_checkpoint_to_hf.py index 8a344c520e..0b77ee3557 100644 --- a/src/transformers/models/d_fine/convert_d_fine_original_pytorch_checkpoint_to_hf.py +++ b/src/transformers/models/d_fine/convert_d_fine_original_pytorch_checkpoint_to_hf.py @@ -158,7 +158,7 @@ def load_original_state_dict(repo_id, model_name): original_state_dict = {} model = torch.load(directory_path, map_location="cpu")["model"] - for key in model.keys(): + for key in model: original_state_dict[key] = model[key] return original_state_dict @@ -406,7 +406,7 @@ def convert_d_fine_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub, # query, key and value matrices need special treatment read_in_q_k_v(state_dict, config, model_name) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if key.endswith("num_batches_tracked"): del state_dict[key] # for two_stage diff --git a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py index 3d8cf3e279..efaac368f6 100644 --- a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py @@ -180,7 +180,7 @@ def write_model(model_name, pretrained_model_weights_path, pytorch_dump_folder_p gc.collect() # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them prefix = "model." - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if not key.startswith("class_embed") and not key.startswith("bbox_predictor"): val = state_dict.pop(key) state_dict[prefix + key] = val diff --git a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py index c88582eacc..dbd7fa3f4d 100644 --- a/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py +++ b/src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py @@ -127,14 +127,14 @@ def convert_deformable_detr_checkpoint( # load original state dict state_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] # rename keys - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) state_dict[rename_key(key)] = val # query, key and value matrices need special treatment read_in_q_k_v(state_dict) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them prefix = "model." - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if not key.startswith("class_embed") and not key.startswith("bbox_embed"): val = state_dict.pop(key) state_dict[prefix + key] = val diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py index 172bdf80d4..b78ae6ee66 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py @@ -318,8 +318,8 @@ class DeformableDetrImageProcessorFast(BaseImageProcessorFast): self.size = get_size_dict(size, max_size=max_size, default_to_square=False) # Backwards compatibility - do_convert_annotations = kwargs.get("do_convert_annotations", None) - do_normalize = kwargs.get("do_normalize", None) + do_convert_annotations = kwargs.get("do_convert_annotations") + do_normalize = kwargs.get("do_normalize") if do_convert_annotations is None and getattr(self, "do_convert_annotations", None) is None: self.do_convert_annotations = do_normalize if do_normalize is not None else self.do_normalize diff --git a/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py index 6436451190..2a38bc05cc 100644 --- a/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py +++ b/src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py @@ -238,7 +238,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub): read_in_decoder_q_k_v(state_dict, config) # fix some prefixes - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if "transformer.decoder.class_embed" in key or "transformer.decoder.bbox_embed" in key: val = state_dict.pop(key) state_dict[key.replace("transformer.decoder", "model.decoder")] = val diff --git a/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py index c2e1ae6001..a72c8c5422 100644 --- a/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py +++ b/src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py @@ -244,7 +244,7 @@ def convert_deta_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub): read_in_decoder_q_k_v(state_dict, config) # fix some prefixes - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if "transformer.decoder.class_embed" in key or "transformer.decoder.bbox_embed" in key: val = state_dict.pop(key) state_dict[key.replace("transformer.decoder", "model.decoder")] = val diff --git a/src/transformers/models/deprecated/deta/modeling_deta.py b/src/transformers/models/deprecated/deta/modeling_deta.py index e109b84938..edc0f2598a 100644 --- a/src/transformers/models/deprecated/deta/modeling_deta.py +++ b/src/transformers/models/deprecated/deta/modeling_deta.py @@ -2042,7 +2042,7 @@ class DetaForObjectDetection(DetaPreTrainedModel): aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) aux_weight_dict.update({k + "_enc": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) - loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) + loss = sum(loss_dict[k] * weight_dict[k] for k in loss_dict if k in weight_dict) if not return_dict: if auxiliary_outputs is not None: diff --git a/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py index 80f16881b5..7b1a4aa5f2 100644 --- a/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py @@ -105,7 +105,7 @@ def rename_key(old_name, num_meta4D_last_stage): def convert_torch_checkpoint(checkpoint, num_meta4D_last_stage): - for key in checkpoint.copy().keys(): + for key in checkpoint.copy(): val = checkpoint.pop(key) checkpoint[rename_key(key, num_meta4D_last_stage)] = val diff --git a/src/transformers/models/deprecated/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py index 8aa927d821..76b9c9cf32 100644 --- a/src/transformers/models/deprecated/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py @@ -38,7 +38,7 @@ def convert_tf_gptsan_to_pt(args): with tf.device("/CPU:0"): reader = tf.train.load_checkpoint(args.tf_model_dir) shapes = reader.get_variable_to_shape_map() - for key_name in shapes.keys(): + for key_name in shapes: vnp = reader.get_tensor(key_name).astype(np.float16) if key_name.endswith("/adam_m") or key_name.endswith("/adam_v"): continue diff --git a/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py b/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py index 5eab135c05..c67b27f64f 100644 --- a/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py +++ b/src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py @@ -398,7 +398,7 @@ class SubWordJapaneseTokenizer: self.vocab = vocab # same as swe self.ids_to_tokens = ids_to_tokens # same as bpe self.emoji = emoji - self.maxlen = np.max([len(w) for w in self.vocab.keys()]) + self.maxlen = np.max([len(w) for w in self.vocab]) self.content_repatter1 = re.compile(r"(https?|ftp)(:\/\/[-_\.!~*\'()a-zA-Z0-9;\/?:\@&=\+$,%#]+)") self.content_repatter2 = re.compile(r"[A-Za-z0-9\._+]*@[\-_0-9A-Za-z]+(\.[A-Za-z]+)*") self.content_repatter3 = re.compile(r"[\(]{0,1}[0-9]{2,4}[\)\-\(]{0,1}[0-9]{2,4}[\)\-]{0,1}[0-9]{3,4}") diff --git a/src/transformers/models/deprecated/graphormer/collating_graphormer.py b/src/transformers/models/deprecated/graphormer/collating_graphormer.py index a5300fe8a5..19bcaac3f5 100644 --- a/src/transformers/models/deprecated/graphormer/collating_graphormer.py +++ b/src/transformers/models/deprecated/graphormer/collating_graphormer.py @@ -27,12 +27,12 @@ def convert_to_single_emb(x, offset: int = 512): def preprocess_item(item, keep_features=True): requires_backends(preprocess_item, ["cython"]) - if keep_features and "edge_attr" in item.keys(): # edge_attr + if keep_features and "edge_attr" in item: # edge_attr edge_attr = np.asarray(item["edge_attr"], dtype=np.int64) else: edge_attr = np.ones((len(item["edge_index"][0]), 1), dtype=np.int64) # same embedding for all - if keep_features and "node_feat" in item.keys(): # input_nodes + if keep_features and "node_feat" in item: # input_nodes node_feature = np.asarray(item["node_feat"], dtype=np.int64) else: node_feature = np.ones((item["num_nodes"], 1), dtype=np.int64) # same embedding for all diff --git a/src/transformers/models/deprecated/jukebox/convert_jukebox.py b/src/transformers/models/deprecated/jukebox/convert_jukebox.py index 3380e38693..29763daaa3 100644 --- a/src/transformers/models/deprecated/jukebox/convert_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/convert_jukebox.py @@ -231,7 +231,7 @@ def convert_openai_checkpoint(model_name=None, pytorch_dump_folder_path=None): old_dic = torch.load(f"{pytorch_dump_folder_path}/{dict_name.split('/')[-1]}", weights_only=True)["model"] new_dic = {} - for k in old_dic.keys(): + for k in old_dic: if k.endswith(".b"): new_dic[k.replace("b", "bias")] = old_dic[k] elif k.endswith(".w"): diff --git a/src/transformers/models/deprecated/jukebox/modeling_jukebox.py b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py index 4dfd1c6924..f928d49cf5 100755 --- a/src/transformers/models/deprecated/jukebox/modeling_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py @@ -2269,7 +2269,7 @@ class JukeboxPreTrainedModel(PreTrainedModel): supports_gradient_checkpointing = False def _init_weights(self, module): - if isinstance(module, JukeboxPrior) or isinstance(module, JukeboxVQVAE): + if isinstance(module, (JukeboxPrior, JukeboxVQVAE)): module.apply(module._init_weights) def __init__(self, *inputs, **kwargs): diff --git a/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py index c6dbb12890..6ac5dd4df1 100644 --- a/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py @@ -188,7 +188,7 @@ def convert_checkpoint_to_huggingface(pretrained_checkpoint_path, output_path, i # also renaming previously confusing parameter names original_state_dict = original_mlm.mega.encoders.state_dict() updated_keys = {} - for module_name in original_state_dict.keys(): + for module_name in original_state_dict: new_module_name = None # have to handle gamma, beta, and alpha differently due to their use # in multiple modules within the original repository; diff --git a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py index abdcc2b40e..235cb81717 100644 --- a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py @@ -787,7 +787,7 @@ class OpenLlamaForCausalLM(OpenLlamaPreTrainedModel): input_ids = input_ids[:, remove_prefix_length:] - position_ids = kwargs.get("position_ids", None) + position_ids = kwargs.get("position_ids") if attention_mask is not None and position_ids is None: # create position_ids on the fly for batch generation position_ids = attention_mask.long().cumsum(-1) - 1 diff --git a/src/transformers/models/deprecated/tapex/tokenization_tapex.py b/src/transformers/models/deprecated/tapex/tokenization_tapex.py index 4ad87dd4e7..b32383ddd4 100644 --- a/src/transformers/models/deprecated/tapex/tokenization_tapex.py +++ b/src/transformers/models/deprecated/tapex/tokenization_tapex.py @@ -1354,12 +1354,12 @@ class TapexTokenizer(PreTrainedTokenizer): # modify the answer list if answer is not None: for i, case in enumerate(answer): - if case in cell_mapping.keys(): + if case in cell_mapping: answer[i] = cell_mapping[case] def truncate_cell(self, cell_value): # do not process on these cases - if isinstance(cell_value, int) or isinstance(cell_value, float): + if isinstance(cell_value, (int, float)): return cell_value if cell_value.strip() != "": try_tokens = self.tokenize(cell_value) diff --git a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py index bdd357f04a..ec43af68d7 100644 --- a/src/transformers/models/deprecated/van/convert_van_to_pytorch.py +++ b/src/transformers/models/deprecated/van/convert_van_to_pytorch.py @@ -45,7 +45,7 @@ class Tracker: handles: list = field(default_factory=list) def _forward_hook(self, m, inputs: Tensor, outputs: Tensor): - has_not_submodules = len(list(m.modules())) == 1 or isinstance(m, nn.Conv2d) or isinstance(m, nn.BatchNorm2d) + has_not_submodules = len(list(m.modules())) == 1 or isinstance(m, (nn.Conv2d, nn.BatchNorm2d)) if has_not_submodules: if not isinstance(m, VanLayerScaling): self.traced.append(m) diff --git a/src/transformers/models/depth_pro/configuration_depth_pro.py b/src/transformers/models/depth_pro/configuration_depth_pro.py index bbd2eea60b..6bc14a0e15 100644 --- a/src/transformers/models/depth_pro/configuration_depth_pro.py +++ b/src/transformers/models/depth_pro/configuration_depth_pro.py @@ -158,7 +158,7 @@ class DepthProConfig(PretrainedConfig): self.patch_model_config = patch_model_config self.fov_model_config = fov_model_config - for sub_config_key in self.sub_configs.keys(): + for sub_config_key in self.sub_configs: sub_config = getattr(self, sub_config_key) if sub_config is None: diff --git a/src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py index ba98514501..8a7a2e0e0a 100644 --- a/src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py @@ -224,7 +224,7 @@ def convert_detr_checkpoint(model_name, pytorch_dump_folder_path): read_in_q_k_v(state_dict, is_panoptic=is_panoptic) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them prefix = "detr.model." if is_panoptic else "model." - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if is_panoptic: if ( key.startswith("detr") diff --git a/src/transformers/models/detr/convert_detr_to_pytorch.py b/src/transformers/models/detr/convert_detr_to_pytorch.py index 6ba6a0e292..ffc755074d 100644 --- a/src/transformers/models/detr/convert_detr_to_pytorch.py +++ b/src/transformers/models/detr/convert_detr_to_pytorch.py @@ -310,7 +310,7 @@ def convert_detr_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_h read_in_q_k_v(state_dict, is_panoptic=is_panoptic) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them prefix = "detr.model." if is_panoptic else "model." - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if is_panoptic: if ( key.startswith("detr") diff --git a/src/transformers/models/detr/image_processing_detr_fast.py b/src/transformers/models/detr/image_processing_detr_fast.py index 8a3235a41d..70ae52fef4 100644 --- a/src/transformers/models/detr/image_processing_detr_fast.py +++ b/src/transformers/models/detr/image_processing_detr_fast.py @@ -339,8 +339,8 @@ class DetrImageProcessorFast(BaseImageProcessorFast): self.size = get_size_dict(size, max_size=max_size, default_to_square=False) # Backwards compatibility - do_convert_annotations = kwargs.get("do_convert_annotations", None) - do_normalize = kwargs.get("do_normalize", None) + do_convert_annotations = kwargs.get("do_convert_annotations") + do_normalize = kwargs.get("do_normalize") if do_convert_annotations is None and getattr(self, "do_convert_annotations", None) is None: self.do_convert_annotations = do_normalize if do_normalize is not None else self.do_normalize diff --git a/src/transformers/models/dia/generation_dia.py b/src/transformers/models/dia/generation_dia.py index 0ca5998bf2..5111e77644 100644 --- a/src/transformers/models/dia/generation_dia.py +++ b/src/transformers/models/dia/generation_dia.py @@ -421,7 +421,7 @@ class DiaGenerationMixin(GenerationMixin): **kwargs, ) -> Union[GenerateOutput, torch.LongTensor]: # We expect the initial input ids to be the complete mask (delayed input) - delay_mask = kwargs.get("decoder_input_ids", None) + delay_mask = kwargs.get("decoder_input_ids") if delay_mask is not None: delay_mask = delay_mask.clone() diff --git a/src/transformers/models/donut/convert_donut_to_pytorch.py b/src/transformers/models/donut/convert_donut_to_pytorch.py index f6f14f6d08..d58cdd6224 100644 --- a/src/transformers/models/donut/convert_donut_to_pytorch.py +++ b/src/transformers/models/donut/convert_donut_to_pytorch.py @@ -93,7 +93,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, model): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "qkv" in key: diff --git a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py index ceae9b8471..ce53018a76 100644 --- a/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py +++ b/src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py @@ -230,7 +230,7 @@ def convert_dpt_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub # remove certain keys remove_ignore_keys_(state_dict) # rename keys - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) state_dict[rename_key(key)] = val # read in qkv matrices diff --git a/src/transformers/models/dpt/convert_dpt_to_pytorch.py b/src/transformers/models/dpt/convert_dpt_to_pytorch.py index 55e0a444e8..1341f8908b 100644 --- a/src/transformers/models/dpt/convert_dpt_to_pytorch.py +++ b/src/transformers/models/dpt/convert_dpt_to_pytorch.py @@ -197,7 +197,7 @@ def convert_dpt_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub # remove certain keys remove_ignore_keys_(state_dict) # rename keys - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) state_dict[rename_key(key)] = val # read in qkv matrices diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index 44d8e6cd7e..b065266795 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -373,9 +373,9 @@ class EncoderDecoderModel(PreTrainedModel, GenerationMixin): } # remove encoder, decoder kwargs from kwargs - for key in kwargs_encoder.keys(): + for key in kwargs_encoder: del kwargs["encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py index 13da12b1aa..67fa50ac36 100644 --- a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py @@ -818,9 +818,9 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): } # remove encoder, decoder kwargs from kwargs - for key in kwargs_encoder.keys(): + for key in kwargs_encoder: del kwargs["encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 05cd02e3bc..da0dde6915 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -374,9 +374,9 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): } # remove encoder, decoder kwargs from kwargs - for key in kwargs_encoder.keys(): + for key in kwargs_encoder: del kwargs["encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/eomt/image_processing_eomt.py b/src/transformers/models/eomt/image_processing_eomt.py index e63a1be95f..37b9b11103 100644 --- a/src/transformers/models/eomt/image_processing_eomt.py +++ b/src/transformers/models/eomt/image_processing_eomt.py @@ -239,7 +239,7 @@ def compute_segments( def get_target_size(size_dict: dict[str, int]) -> tuple[int, int]: """Returns the height and width from a size dict.""" target_height = size_dict["shortest_edge"] - target_width = size_dict.get("longest_edge", None) or target_height + target_width = size_dict.get("longest_edge") or target_height return target_height, target_width diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index 5cd2bd5058..a6b73b80ce 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -650,7 +650,7 @@ class FalconPreTrainedModel(PreTrainedModel): def _init_weights(self, module: nn.Module): """Initialize the weights.""" - if isinstance(module, nn.Linear) or isinstance(module, FalconLinear): + if isinstance(module, (nn.Linear, FalconLinear)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) diff --git a/src/transformers/models/focalnet/convert_focalnet_to_hf_format.py b/src/transformers/models/focalnet/convert_focalnet_to_hf_format.py index 4aed159280..d6af117e56 100644 --- a/src/transformers/models/focalnet/convert_focalnet_to_hf_format.py +++ b/src/transformers/models/focalnet/convert_focalnet_to_hf_format.py @@ -141,7 +141,7 @@ def convert_focalnet_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu")["model"] # rename keys - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) state_dict[rename_key(key)] = val diff --git a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py index c86afddc79..35e8265850 100755 --- a/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py @@ -134,7 +134,7 @@ def convert_fsmt_checkpoint_to_pytorch(fsmt_checkpoint_path, pytorch_dump_folder # detect whether this is a do_lower_case situation, which can be derived by checking whether we # have at least one uppercase letter in the source vocab do_lower_case = True - for k in src_vocab.keys(): + for k in src_vocab: if not k.islower(): do_lower_case = False break diff --git a/src/transformers/models/fuyu/processing_fuyu.py b/src/transformers/models/fuyu/processing_fuyu.py index f13869a146..54e5054efb 100644 --- a/src/transformers/models/fuyu/processing_fuyu.py +++ b/src/transformers/models/fuyu/processing_fuyu.py @@ -607,7 +607,7 @@ class FuyuProcessor(ProcessorMixin): vision_data = {} if image_sizes is not None: - size = kwargs.get("size", None) or self.image_processor.size + size = kwargs.get("size") or self.image_processor.size padded_height, padded_width = size["height"], size["width"] num_image_tokens = [] diff --git a/src/transformers/models/gemma3n/configuration_gemma3n.py b/src/transformers/models/gemma3n/configuration_gemma3n.py index 19013e7fdf..978df567c7 100644 --- a/src/transformers/models/gemma3n/configuration_gemma3n.py +++ b/src/transformers/models/gemma3n/configuration_gemma3n.py @@ -509,7 +509,7 @@ class Gemma3nVisionConfig(PretrainedConfig): @classmethod def from_dict(cls, config_dict: dict[str, Any], **kwargs): - label_names = config_dict.get("label_names", None) + label_names = config_dict.get("label_names") is_custom_model = "num_labels" in kwargs or "id2label" in kwargs # if no labels added to config, use imagenet labeller in timm diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py index 3312e80c79..5a195b06ec 100644 --- a/src/transformers/models/git/modeling_git.py +++ b/src/transformers/models/git/modeling_git.py @@ -1451,7 +1451,7 @@ class GitForCausalLM(GitPreTrainedModel, GenerationMixin): return { "input_ids": input_ids, "attention_mask": attention_mask, - "pixel_values": kwargs.get("pixel_values", None), + "pixel_values": kwargs.get("pixel_values"), "past_key_values": past_key_values, "use_cache": use_cache, } diff --git a/src/transformers/models/glm4v/convert_glm4v_mgt_weights_to_hf.py b/src/transformers/models/glm4v/convert_glm4v_mgt_weights_to_hf.py index a1e09375dc..a9398805e9 100644 --- a/src/transformers/models/glm4v/convert_glm4v_mgt_weights_to_hf.py +++ b/src/transformers/models/glm4v/convert_glm4v_mgt_weights_to_hf.py @@ -232,7 +232,7 @@ def save_sharded_model(state_dict, output_path, max_shard_size_gb=5, num_layers= shard_filename = f"model-{i + 1:05d}-of-{len(shards):05d}.safetensors" shard_path = os.path.join(output_path, shard_filename) - for param_name in shard.keys(): + for param_name in shard: index_dict["weight_map"][param_name] = shard_filename save_file(shard, shard_path, metadata={"format": "pt"}) diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index c3155b17ea..7e1100d64e 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -718,7 +718,7 @@ class GPT2Model(GPT2PreTrainedModel): ) assert_device_map(self.device_map, len(self.h)) self.model_parallel = True - self.first_device = "cpu" if "cpu" in self.device_map.keys() else "cuda:" + str(min(self.device_map.keys())) + self.first_device = "cpu" if "cpu" in self.device_map else "cuda:" + str(min(self.device_map.keys())) self.last_device = "cuda:" + str(max(self.device_map.keys())) self.wte = self.wte.to(self.first_device) self.wpe = self.wpe.to(self.first_device) diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 27b2e20c37..42e23fc290 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -845,15 +845,15 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): self.set_input_embeddings(value) def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_cache=None, **kwargs): - token_type_ids = kwargs.get("token_type_ids", None) + token_type_ids = kwargs.get("token_type_ids") # only last token for inputs_ids if past is defined in kwargs if past_key_values: inputs = tf.expand_dims(inputs[:, -1], -1) if token_type_ids is not None: token_type_ids = tf.expand_dims(token_type_ids[:, -1], -1) - position_ids = kwargs.get("position_ids", None) - attention_mask = kwargs.get("attention_mask", None) + position_ids = kwargs.get("position_ids") + attention_mask = kwargs.get("attention_mask") if attention_mask is not None and position_ids is None: position_ids = tf.math.cumsum(attention_mask, axis=-1, exclusive=True) diff --git a/src/transformers/models/gpt2/tokenization_gpt2_tf.py b/src/transformers/models/gpt2/tokenization_gpt2_tf.py index a1facd9570..145a45da0d 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2_tf.py +++ b/src/transformers/models/gpt2/tokenization_gpt2_tf.py @@ -61,7 +61,7 @@ class TFGPT2Tokenizer(keras.layers.Layer): tf_tokenizer = TFGPT2Tokenizer.from_tokenizer(tokenizer) ``` """ - merges = [" ".join(m) for m in tokenizer.bpe_ranks.keys()] + merges = [" ".join(m) for m in tokenizer.bpe_ranks] vocab = tokenizer.get_vocab() return cls(vocab, merges, *args, **kwargs) diff --git a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py index b65d9b53be..891f77ece3 100644 --- a/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py @@ -221,7 +221,7 @@ class SubWordJapaneseTokenizer: self.vocab = vocab # same as swe self.ids_to_tokens = ids_to_tokens # same as bpe self.emoji = emoji - self.maxlen = np.max([len(w) for w in self.vocab.keys()]) + self.maxlen = np.max([len(w) for w in self.vocab]) self.content_repatter1 = re.compile(r"(https?|ftp)(:\/\/[-_\.!~*\'()a-zA-Z0-9;\/?:\@&=\+$,%#]+)") self.content_repatter2 = re.compile(r"[A-Za-z0-9\._+]*@[\-_0-9A-Za-z]+(\.[A-Za-z]+)*") self.content_repatter3 = re.compile(r"[\(]{0,1}[0-9]{2,4}[\)\-\(]{0,1}[0-9]{2,4}[\)\-]{0,1}[0-9]{3,4}") diff --git a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py index c4e2ff67c5..27ec2f20d8 100644 --- a/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py +++ b/src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py @@ -34,7 +34,7 @@ def recursive_print(name, val, spaces=0): if isinstance(val, dict): if msg is not None: print(msg) - for k in val.keys(): + for k in val: recursive_print(k, val[k], spaces + 2) elif isinstance(val, torch.Tensor): print(msg, ":", val.size()) diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index 0622bf5ed0..d95d83d2c9 100644 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -581,7 +581,7 @@ class GPTJModel(GPTJPreTrainedModel): ) assert_device_map(self.device_map, len(self.h)) self.model_parallel = True - self.first_device = "cpu" if "cpu" in self.device_map.keys() else "cuda:" + str(min(self.device_map.keys())) + self.first_device = "cpu" if "cpu" in self.device_map else "cuda:" + str(min(self.device_map.keys())) self.last_device = "cuda:" + str(max(self.device_map.keys())) self.wte = self.wte.to(self.first_device) # Load onto devices diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index d9327bb50a..0ec3225822 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -752,15 +752,15 @@ class TFGPTJForCausalLM(TFGPTJPreTrainedModel, TFCausalLanguageModelingLoss): self.config = config def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_cache=None, **kwargs): - token_type_ids = kwargs.get("token_type_ids", None) + token_type_ids = kwargs.get("token_type_ids") # only last token for inputs_ids if past is defined in kwargs if past_key_values: inputs = tf.expand_dims(inputs[:, -1], -1) if token_type_ids is not None: token_type_ids = tf.expand_dims(token_type_ids[:, -1], -1) - position_ids = kwargs.get("position_ids", None) - attention_mask = kwargs.get("attention_mask", None) + position_ids = kwargs.get("position_ids") + attention_mask = kwargs.get("attention_mask") if attention_mask is not None and position_ids is None: position_ids = tf.math.cumsum(attention_mask, axis=-1, exclusive=True) diff --git a/src/transformers/models/grounding_dino/convert_grounding_dino_to_hf.py b/src/transformers/models/grounding_dino/convert_grounding_dino_to_hf.py index 9f3f70bc57..b7358e2a01 100644 --- a/src/transformers/models/grounding_dino/convert_grounding_dino_to_hf.py +++ b/src/transformers/models/grounding_dino/convert_grounding_dino_to_hf.py @@ -239,7 +239,7 @@ def create_rename_keys(state_dict, config): ########################################## DECODER - END ########################################## Additional - START - for layer_name in state_dict.keys(): + for layer_name in state_dict: #### TEXT BACKBONE if "bert" in layer_name: rename_keys.append((layer_name, layer_name.replace("bert", "model.text_backbone"))) diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py index 6505ddd896..ff291b3d67 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py @@ -349,8 +349,8 @@ class GroundingDinoImageProcessorFast(BaseImageProcessorFast): self.size = get_size_dict(size, max_size=max_size, default_to_square=False) # Backwards compatibility - do_convert_annotations = kwargs.get("do_convert_annotations", None) - do_normalize = kwargs.get("do_normalize", None) + do_convert_annotations = kwargs.get("do_convert_annotations") + do_normalize = kwargs.get("do_normalize") if do_convert_annotations is None and getattr(self, "do_convert_annotations", None) is None: self.do_convert_annotations = do_normalize if do_normalize is not None else self.do_normalize diff --git a/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py b/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py index 6bc2818498..ac6844bd34 100644 --- a/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py +++ b/src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py @@ -85,7 +85,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, config): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "qkv" in key: diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index 0b7ed531db..d3b462692a 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -1276,7 +1276,7 @@ class TFHubertMainLayer(keras.layers.Layer): hidden_states = self.feature_projection(hidden_states, training=training) - mask_time_indices = kwargs.get("mask_time_indices", None) + mask_time_indices = kwargs.get("mask_time_indices") if training: hidden_states = self._mask_hidden_states(hidden_states, mask_time_indices=mask_time_indices) diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index ac8b7776c5..e47ddbe273 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -132,10 +132,10 @@ def expand_inputs_for_generation( torch.arange(input_ids.shape[0]).view(-1, 1).repeat(1, expand_size).view(-1).to(input_ids.device) ) input_ids = input_ids.index_select(0, expanded_return_idx) - model_kwargs["pixel_values"] = model_kwargs.get("pixel_values", None) - model_kwargs["image_encoder_embeddings"] = model_kwargs.get("image_encoder_embeddings", None) - model_kwargs["perceiver_embeddings"] = model_kwargs.get("perceiver_embeddings", None) - model_kwargs["image_attention_mask"] = model_kwargs.get("image_attention_mask", None) + model_kwargs["pixel_values"] = model_kwargs.get("pixel_values") + model_kwargs["image_encoder_embeddings"] = model_kwargs.get("image_encoder_embeddings") + model_kwargs["perceiver_embeddings"] = model_kwargs.get("perceiver_embeddings") + model_kwargs["image_attention_mask"] = model_kwargs.get("image_attention_mask") if "token_type_ids" in model_kwargs: token_type_ids = model_kwargs["token_type_ids"] diff --git a/src/transformers/models/idefics/modeling_tf_idefics.py b/src/transformers/models/idefics/modeling_tf_idefics.py index f006862284..28bffb7921 100644 --- a/src/transformers/models/idefics/modeling_tf_idefics.py +++ b/src/transformers/models/idefics/modeling_tf_idefics.py @@ -149,10 +149,10 @@ def expand_inputs_for_generation( ): expanded_return_idx = tf.reshape(tf.repeat(tf.range(tf.shape(input_ids)[0]), expand_size), [-1]) input_ids = tf.gather(input_ids, expanded_return_idx) - model_kwargs["pixel_values"] = model_kwargs.get("pixel_values", None) - model_kwargs["image_encoder_embeddings"] = model_kwargs.get("image_encoder_embeddings", None) - model_kwargs["perceiver_embeddings"] = model_kwargs.get("perceiver_embeddings", None) - model_kwargs["image_attention_mask"] = model_kwargs.get("image_attention_mask", None) + model_kwargs["pixel_values"] = model_kwargs.get("pixel_values") + model_kwargs["image_encoder_embeddings"] = model_kwargs.get("image_encoder_embeddings") + model_kwargs["perceiver_embeddings"] = model_kwargs.get("perceiver_embeddings") + model_kwargs["image_attention_mask"] = model_kwargs.get("image_attention_mask") if "token_type_ids" in model_kwargs: token_type_ids = model_kwargs["token_type_ids"] @@ -208,15 +208,15 @@ def update_model_kwargs_for_generation(outputs, model_kwargs): def prepare_inputs_for_generation(input_ids, past_key_values=None, **kwargs): - token_type_ids = kwargs.get("token_type_ids", None) + token_type_ids = kwargs.get("token_type_ids") # only last token for inputs_ids if past is defined in kwargs if past_key_values is not None: input_ids = input_ids[:, -1:] if token_type_ids is not None: token_type_ids = token_type_ids[:, -1:] - attention_mask = kwargs.get("attention_mask", None) - position_ids = kwargs.get("position_ids", None) + attention_mask = kwargs.get("attention_mask") + position_ids = kwargs.get("position_ids") if attention_mask is not None and position_ids is None: # create position_ids on the fly for batch generation @@ -225,10 +225,10 @@ def prepare_inputs_for_generation(input_ids, past_key_values=None, **kwargs): if past_key_values is not None: position_ids = position_ids[:, -1:] - pixel_values = kwargs.get("pixel_values", None) - image_encoder_embeddings = kwargs.get("image_encoder_embeddings", None) - perceiver_embeddings = kwargs.get("perceiver_embeddings", None) - image_attention_mask = kwargs.get("image_attention_mask", None) + pixel_values = kwargs.get("pixel_values") + image_encoder_embeddings = kwargs.get("image_encoder_embeddings") + perceiver_embeddings = kwargs.get("perceiver_embeddings") + image_attention_mask = kwargs.get("image_attention_mask") interpolate_pos_encoding = kwargs.get("interpolate_pos_encoding", False) return { diff --git a/src/transformers/models/internvl/convert_internvl_weights_to_hf.py b/src/transformers/models/internvl/convert_internvl_weights_to_hf.py index a390166a04..e20fcf4f36 100644 --- a/src/transformers/models/internvl/convert_internvl_weights_to_hf.py +++ b/src/transformers/models/internvl/convert_internvl_weights_to_hf.py @@ -128,7 +128,7 @@ def get_lm_type(path: str) -> Literal["qwen2", "llama"]: """ Determine the type of language model (either 'qwen2' or 'llama') based on a given model path. """ - if path not in LM_TYPE_CORRESPONDENCE.keys(): + if path not in LM_TYPE_CORRESPONDENCE: base_config = AutoModel.from_pretrained(path, trust_remote_code=True).config lm_arch = base_config.llm_config.architectures[0] diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py index a7c1502058..8e324ee0b8 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py @@ -519,7 +519,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): # To match each overflowing sample with the original sample in the batch # we add an overflow_to_sample_mapping array (see below) sanitized_tokens = {} - for key in tokens_and_encodings[0][0].keys(): + for key in tokens_and_encodings[0][0]: stack = [e for item, _ in tokens_and_encodings for e in item[key]] sanitized_tokens[key] = stack sanitized_encodings = [e for _, item in tokens_and_encodings for e in item] diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py index aa2fb67b13..d040763859 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py @@ -566,7 +566,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast): # To match each overflowing sample with the original sample in the batch # we add an overflow_to_sample_mapping array (see below) sanitized_tokens = {} - for key in tokens_and_encodings[0][0].keys(): + for key in tokens_and_encodings[0][0]: stack = [e for item, _ in tokens_and_encodings for e in item[key]] sanitized_tokens[key] = stack sanitized_encodings = [e for _, item in tokens_and_encodings for e in item] diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py index cca7a805fe..6710c6c8cb 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py @@ -503,7 +503,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast): # To match each overflowing sample with the original sample in the batch # we add an overflow_to_sample_mapping array (see below) sanitized_tokens = {} - for key in tokens_and_encodings[0][0].keys(): + for key in tokens_and_encodings[0][0]: stack = [e for item, _ in tokens_and_encodings for e in item[key]] sanitized_tokens[key] = stack sanitized_encodings = [e for _, item in tokens_and_encodings for e in item] diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py index 8b33055877..efbc757e86 100644 --- a/src/transformers/models/luke/tokenization_luke.py +++ b/src/transformers/models/luke/tokenization_luke.py @@ -1450,7 +1450,7 @@ class LukeTokenizer(PreTrainedTokenizer): # If we have a list of dicts, let's convert it in a dict of lists # We do this to allow using this method as a collate_fn function in PyTorch Dataloader if isinstance(encoded_inputs, (list, tuple)) and isinstance(encoded_inputs[0], Mapping): - encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0].keys()} + encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0]} # The model's main input name, usually `input_ids`, has be passed for padding if self.model_input_names[0] not in encoded_inputs: diff --git a/src/transformers/models/markuplm/tokenization_markuplm_fast.py b/src/transformers/models/markuplm/tokenization_markuplm_fast.py index f844893641..4033ef319f 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm_fast.py +++ b/src/transformers/models/markuplm/tokenization_markuplm_fast.py @@ -625,7 +625,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast): # To match each overflowing sample with the original sample in the batch # we add an overflow_to_sample_mapping array (see below) sanitized_tokens = {} - for key in tokens_and_encodings[0][0].keys(): + for key in tokens_and_encodings[0][0]: stack = [e for item, _ in tokens_and_encodings for e in item[key]] sanitized_tokens[key] = stack sanitized_encodings = [e for _, item in tokens_and_encodings for e in item] diff --git a/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py index 09acc3b97a..33cba259ee 100644 --- a/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py @@ -804,7 +804,7 @@ class OriginalMask2FormerCheckpointToOursConverter: logger.info(f"Not copied keys are {pformat(src_state_dict.keys())}") logger.info("🙌 Done") - state_dict = {key: dst_state_dict[key] for key in dst_state_dict.to_track.keys()} + state_dict = {key: dst_state_dict[key] for key in dst_state_dict.to_track} mask2former.load_state_dict(state_dict) return mask2former @@ -816,7 +816,7 @@ class OriginalMask2FormerCheckpointToOursConverter: self.replace_universal_segmentation_module(dst_state_dict, src_state_dict) - state_dict = {key: dst_state_dict[key] for key in dst_state_dict.to_track.keys()} + state_dict = {key: dst_state_dict[key] for key in dst_state_dict.to_track} mask2former.load_state_dict(state_dict) return mask2former diff --git a/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py b/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py index cd29b7ad78..69ebed5aa8 100644 --- a/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py +++ b/src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py @@ -57,7 +57,7 @@ def recursive_print(name, val, spaces=0): if isinstance(val, dict): if msg is not None: print(msg) - for k in val.keys(): + for k in val: recursive_print(k, val[k], spaces + 2) elif isinstance(val, torch.Tensor): print(msg, ":", val.size()) @@ -116,7 +116,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config): # The hidden_size per head. hidden_size_per_head = config.hidden_size // heads # Megatron-LM checkpoint version - if "checkpoint_version" in input_state_dict.keys(): + if "checkpoint_version" in input_state_dict: checkpoint_version = input_state_dict["checkpoint_version"] else: checkpoint_version = 0.0 @@ -147,7 +147,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config): output_state_dict["bert.embeddings.token_type_embeddings.weight"] = tokentype_embeddings # The transformer. - transformer = lm["transformer"] if "transformer" in lm.keys() else lm["encoder"] + transformer = lm["transformer"] if "transformer" in lm else lm["encoder"] # The regex to extract layer names. layer_re = re.compile(r"layers\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)") diff --git a/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py b/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py index 2e50a3b6b6..ab866834a2 100644 --- a/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py +++ b/src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py @@ -189,7 +189,7 @@ def recursive_print(name, val, spaces=0): if isinstance(val, dict): if msg is not None: print(msg) - for k in val.keys(): + for k in val: recursive_print(k, val[k], spaces + 2) elif isinstance(val, torch.Tensor): print(msg, ":", val.size()) @@ -448,7 +448,7 @@ def convert_checkpoint_from_megatron_to_transformers(args): # The transformer. path = ( "model.language_model.transformer" - if "transformer" in get_element_from_dict_by_path(tp_state_dicts[0], "model.language_model").keys() + if "transformer" in get_element_from_dict_by_path(tp_state_dicts[0], "model.language_model") else "model.language_model.encoder" ) # Extract the layers. @@ -793,9 +793,7 @@ def convert_checkpoint_from_transformers_to_megatron(args): for layer in range(num_layers): pp_layer_id = layer + layer_offset layers_to_copy = [ - layer_name - for layer_name in state_dict.keys() - if layer_name.startswith(f"transformer.h.{pp_layer_id}.") + layer_name for layer_name in state_dict if layer_name.startswith(f"transformer.h.{pp_layer_id}.") ] for layer_name in layers_to_copy: @@ -844,7 +842,7 @@ def convert_checkpoint_from_transformers_to_megatron(args): # handle attention and mlp weights elif weight_or_bias == "weight": - out_name = transformers_to_megatron.get(op_name, None) + out_name = transformers_to_megatron.get(op_name) if out_name is None: continue params = params.transpose(0, 1) @@ -852,7 +850,7 @@ def convert_checkpoint_from_transformers_to_megatron(args): # handle attention and mlp bias elif weight_or_bias == "bias": - out_name = transformers_to_megatron.get(op_name, None) + out_name = transformers_to_megatron.get(op_name) if out_name is None: continue layer_name = f"layers.{layer}.{out_name}.{weight_or_bias}" diff --git a/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py b/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py index c0054fab3f..d1953f50ba 100644 --- a/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py +++ b/src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py @@ -57,7 +57,7 @@ def recursive_print(name, val, spaces=0): if isinstance(val, dict): if msg is not None: print(msg) - for k in val.keys(): + for k in val: recursive_print(k, val[k], spaces + 2) elif isinstance(val, torch.Tensor): print(msg, ":", val.size()) @@ -115,7 +115,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config): # The hidden_size per head. hidden_size_per_head = config.n_embd // config.n_head # Megatron-LM checkpoint version - if "checkpoint_version" in input_state_dict.keys(): + if "checkpoint_version" in input_state_dict: checkpoint_version = input_state_dict["checkpoint_version"] else: checkpoint_version = 0.0 @@ -145,7 +145,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config): output_state_dict["transformer.wpe.weight"] = pos_embeddings # The transformer. - transformer = lm["transformer"] if "transformer" in lm.keys() else lm["encoder"] + transformer = lm["transformer"] if "transformer" in lm else lm["encoder"] # The regex to extract layer names. layer_re = re.compile(r"layers\.(\d+)\.([a-z0-9_.]+)\.([a-z0-9_]+)") diff --git a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py index 0f9cf597d5..a790fed81d 100644 --- a/src/transformers/models/mistral/convert_mistral_weights_to_hf.py +++ b/src/transformers/models/mistral/convert_mistral_weights_to_hf.py @@ -176,7 +176,7 @@ def convert_config(original_config: dict, max_position_embeddings: int = 32768): new_config_kwargs.update({k: v for k, v in original_config.items() if k in similar_keys_to_keep}) # These are not always defined depending on `params.json` - new_config_kwargs["sliding_window"] = original_config.get("sliding_window", None) + new_config_kwargs["sliding_window"] = original_config.get("sliding_window") new_config_kwargs["num_key_value_heads"] = original_config.get( "n_kv_heads", new_config_kwargs["num_attention_heads"] ) diff --git a/src/transformers/models/mistral/modeling_tf_mistral.py b/src/transformers/models/mistral/modeling_tf_mistral.py index b9afd8ab26..30ca8d5769 100644 --- a/src/transformers/models/mistral/modeling_tf_mistral.py +++ b/src/transformers/models/mistral/modeling_tf_mistral.py @@ -880,7 +880,7 @@ class TFMistralForCausalLM(TFMistralPreTrainedModel, TFCausalLanguageModelingLos if past_key_values: input_ids = tf.expand_dims(input_ids[:, -1], -1) - position_ids = kwargs.get("position_ids", None) + position_ids = kwargs.get("position_ids") if attention_mask is not None and position_ids is None: position_ids = tf.math.cumsum(attention_mask, axis=-1, exclusive=True) if past_key_values: diff --git a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py index 1edcae80f6..be0f52a70e 100644 --- a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py @@ -100,7 +100,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p state_dict.pop("lm_head.decoder.weight") state_dict.pop("lm_head.decoder.bias") state_dict_for_hugging_face = OrderedDict() - for key in state_dict.keys(): + for key in state_dict: if not (key.startswith("lm_head") or key.startswith("entity_predictions")): state_dict_for_hugging_face[f"luke.{key}"] = state_dict[key] else: diff --git a/src/transformers/models/mluke/tokenization_mluke.py b/src/transformers/models/mluke/tokenization_mluke.py index f858eca12d..15f4db5328 100644 --- a/src/transformers/models/mluke/tokenization_mluke.py +++ b/src/transformers/models/mluke/tokenization_mluke.py @@ -1288,7 +1288,7 @@ class MLukeTokenizer(PreTrainedTokenizer): # If we have a list of dicts, let's convert it in a dict of lists # We do this to allow using this method as a collate_fn function in PyTorch Dataloader if isinstance(encoded_inputs, (list, tuple)) and isinstance(encoded_inputs[0], Mapping): - encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0].keys()} + encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0]} # The model's main input name, usually `input_ids`, has be passed for padding if self.model_input_names[0] not in encoded_inputs: diff --git a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py index 7dc6dfa288..a8159b446f 100644 --- a/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py +++ b/src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py @@ -155,7 +155,7 @@ def convert_state_dict(orig_state_dict, model, base_model=False): else: model_prefix = "mobilevit." - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if key[:8] == "encoder.": diff --git a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py index 1fc473e093..8d462c7dd4 100644 --- a/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py +++ b/src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py @@ -134,7 +134,7 @@ def create_rename_keys(state_dict, base_model=False): model_prefix = "mobilevitv2." rename_keys = [] - for k in state_dict.keys(): + for k in state_dict: if k[:8] == "encoder.": k_new = k[8:] else: @@ -216,7 +216,7 @@ def create_rename_keys(state_dict, base_model=False): def remove_unused_keys(state_dict): """remove unused keys (e.g.: seg_head.aux_head)""" keys_to_ignore = [] - for k in state_dict.keys(): + for k in state_dict: if k.startswith("seg_head.aux_head."): keys_to_ignore.append(k) for k in keys_to_ignore: diff --git a/src/transformers/models/moonshine/convert_usefulsensors_to_hf.py b/src/transformers/models/moonshine/convert_usefulsensors_to_hf.py index 39d8df0f3f..f29da8c8e2 100644 --- a/src/transformers/models/moonshine/convert_usefulsensors_to_hf.py +++ b/src/transformers/models/moonshine/convert_usefulsensors_to_hf.py @@ -33,7 +33,7 @@ def _get_weights(model_name): def _read_h5_weights(group, current_key="", weights={}): - for key in group.keys(): + for key in group: full_key = f"{current_key}.{key}" if current_key else key if isinstance(group[key], h5py.Dataset): w = np.array(group[key]) diff --git a/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py b/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py index cdee89efda..6330e2fe92 100644 --- a/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py +++ b/src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py @@ -62,7 +62,7 @@ def rename_key(orig_key): def convert_checkpoint_helper(max_position_embeddings, orig_state_dict): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if ("pooler" in key) or ("sen_class" in key): diff --git a/src/transformers/models/mt5/modeling_mt5.py b/src/transformers/models/mt5/modeling_mt5.py index 071010abf1..0a414cdf11 100644 --- a/src/transformers/models/mt5/modeling_mt5.py +++ b/src/transformers/models/mt5/modeling_mt5.py @@ -898,7 +898,7 @@ class MT5Stack(MT5PreTrainedModel): ) assert_device_map(self.device_map, len(self.block)) self.model_parallel = True - self.first_device = "cpu" if "cpu" in self.device_map.keys() else "cuda:" + str(min(self.device_map.keys())) + self.first_device = "cpu" if "cpu" in self.device_map else "cuda:" + str(min(self.device_map.keys())) self.last_device = "cuda:" + str(max(self.device_map.keys())) # Load onto devices for k, v in self.device_map.items(): diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py index 91c505c636..bc0dc7122b 100644 --- a/src/transformers/models/musicgen/modeling_musicgen.py +++ b/src/transformers/models/musicgen/modeling_musicgen.py @@ -1589,11 +1589,11 @@ class MusicgenForConditionalGeneration(MusicgenPreTrainedModel, GenerationMixin) } # remove text encoder, audio encoder and decoder kwargs from kwargs - for key in kwargs_text_encoder.keys(): + for key in kwargs_text_encoder: del kwargs["text_encoder_" + key] - for key in kwargs_audio_encoder.keys(): + for key in kwargs_audio_encoder: del kwargs["audio_encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index 23c5314c54..aca49d8e83 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -1472,11 +1472,11 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin): } # remove text encoder, audio encoder and decoder kwargs from kwargs - for key in kwargs_text_encoder.keys(): + for key in kwargs_text_encoder: del kwargs["text_encoder_" + key] - for key in kwargs_audio_encoder.keys(): + for key in kwargs_audio_encoder: del kwargs["audio_encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py index 317c5c713c..ef2e3d0d90 100644 --- a/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py @@ -46,7 +46,7 @@ def make_linear_from_emb(emb): def rename_fairseq_keys(state_dict, expert_idx=None): new_dict = {} - for old_key in state_dict.keys(): + for old_key in state_dict: key = old_key if "moe_layer.experts." in key: if expert_idx is not None: diff --git a/src/transformers/models/nougat/convert_nougat_to_hf.py b/src/transformers/models/nougat/convert_nougat_to_hf.py index e42f8553ac..d8096ad864 100644 --- a/src/transformers/models/nougat/convert_nougat_to_hf.py +++ b/src/transformers/models/nougat/convert_nougat_to_hf.py @@ -100,7 +100,7 @@ def rename_key(name): # Copied from transformers.models.donut.convert_donut_to_pytorch.convert_state_dict def convert_state_dict(orig_state_dict, model): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "qkv" in key: diff --git a/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py index 7be4869181..934a23e010 100644 --- a/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py @@ -61,7 +61,7 @@ def rename_key(orig_key): def convert_checkpoint_helper(config, orig_state_dict): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if ("pooler" in key) or ("sen_class" in key) or ("conv.bias" in key): diff --git a/src/transformers/models/omdet_turbo/convert_omdet_turbo_to_hf.py b/src/transformers/models/omdet_turbo/convert_omdet_turbo_to_hf.py index e4e31e4d8a..da358d7119 100644 --- a/src/transformers/models/omdet_turbo/convert_omdet_turbo_to_hf.py +++ b/src/transformers/models/omdet_turbo/convert_omdet_turbo_to_hf.py @@ -66,7 +66,7 @@ def create_rename_keys_vision(state_dict, config): rename_keys = [] # fmt: off ########################################## VISION BACKBONE - START - for layer_name in state_dict.keys(): + for layer_name in state_dict: if layer_name.startswith("backbone") and not layer_name.startswith("backbone.norm"): if config.use_timm_backbone: layer_name_replace = layer_name.replace("backbone", "vision_backbone.vision_backbone._backbone") @@ -100,7 +100,7 @@ def create_rename_keys_vision(state_dict, config): ########################################## VISION BACKBONE - END ########################################## ENCODER - START - for layer_name in state_dict.keys(): + for layer_name in state_dict: if "neck" in layer_name: layer_name_replace = layer_name.replace("neck", "encoder") layer_name_replace = layer_name_replace.replace("input_proj", "channel_projection_layers") @@ -117,7 +117,7 @@ def create_rename_keys_vision(state_dict, config): ########################################## ENCODER - END ########################################## DECODER - START - for layer_name in state_dict.keys(): + for layer_name in state_dict: if layer_name.startswith("decoder"): layer_name_replace = layer_name.replace("decoder.decoder.layers", "decoder.layers") layer_name_replace = layer_name_replace.replace("input_proj", "channel_projection_layers") @@ -136,7 +136,7 @@ def create_rename_keys_vision(state_dict, config): def create_rename_keys_language(state_dict): rename_keys = [] # fmt: off - for layer_name in state_dict.keys(): + for layer_name in state_dict: if layer_name.startswith("language_backbone") and not layer_name.startswith("language_backbone.text_projection"): layer_name_replace = layer_name.replace("language_backbone", "language_backbone.model.text_model") layer_name_replace = layer_name_replace.replace("transformer.resblocks", "encoder.layers") diff --git a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py index 9a9b0c306c..98731ed212 100644 --- a/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py @@ -30,7 +30,7 @@ logger = logging.get_logger(__name__) def load_checkpoint(checkpoint_path): """Checkpoint path should end in model.pt""" sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True) - if "model" in sd.keys(): + if "model" in sd: sd = torch.load(checkpoint_path, map_location="cpu", weights_only=True)["model"] # pop unnecessary weights diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index b7c817a034..f996256063 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -934,7 +934,7 @@ class TFOPTForCausalLM(TFOPTPreTrainedModel, TFCausalLanguageModelingLoss): return self.model.get_input_embeddings() def prepare_inputs_for_generation(self, inputs, past_key_values=None, use_cache=None, **kwargs): - attention_mask = kwargs.get("attention_mask", None) + attention_mask = kwargs.get("attention_mask") # only last token for inputs_ids if past is defined in kwargs if past_key_values: diff --git a/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py b/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py index 49ce578d00..ea766c366f 100644 --- a/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py +++ b/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py @@ -220,7 +220,7 @@ def copy_class_box_heads(hf_model, flax_params): # Copy flax params to PyTorch params for name, param in new_params.items(): - if name in pt_params.keys(): + if name in pt_params: pt_params[name].copy_(param) @@ -313,7 +313,7 @@ def convert_clip_backbone(flax_params, torch_config): # Copy flax CLIP backbone params to PyTorch params for name, param in new_torch_params.items(): - if name in torch_clip_params.keys(): + if name in torch_clip_params: new_param = torch.from_numpy(param) torch_clip_params[name].copy_(new_param) else: diff --git a/src/transformers/models/paligemma/convert_paligemma2_weights_to_hf.py b/src/transformers/models/paligemma/convert_paligemma2_weights_to_hf.py index 62619ea1d3..630ff7b0b0 100644 --- a/src/transformers/models/paligemma/convert_paligemma2_weights_to_hf.py +++ b/src/transformers/models/paligemma/convert_paligemma2_weights_to_hf.py @@ -263,7 +263,7 @@ def slice_state_dict(state_dict, config): state_dict[f"language_model.model.layers.{i}.post_feedforward_layernorm.weight"] = llm_post_feedforward_layernorm[i] state_dict["language_model.model.norm.weight"] = state_dict.pop("llm/final_norm/scale") state_dict["language_model.lm_head.weight"] = embedding_vector # weights are tied. - [k for k in state_dict.keys() if not k.startswith('vision') and not k.startswith('language')] + [k for k in state_dict if not k.startswith('vision') and not k.startswith('language')] # fmt: on for key, value in state_dict.items(): if not isinstance(value, torch.Tensor): diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index bd3a2d9e8c..d07fa49ddc 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -577,7 +577,7 @@ class PerceiverPreTrainedModel(PreTrainedModel): elif hasattr(module, "position_embeddings") and isinstance(module, PerceiverTrainablePositionEncoding): module.position_embeddings.data.normal_(mean=0.0, std=self.config.initializer_range) elif isinstance(module, nn.ParameterDict): - for modality in module.keys(): + for modality in module: module[modality].data.normal_(mean=0.0, std=self.config.initializer_range) elif isinstance(module, nn.Embedding): module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) diff --git a/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py b/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py index b775f2f0fd..919a120312 100644 --- a/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py +++ b/src/transformers/models/perception_lm/image_processing_perception_lm_fast.py @@ -218,7 +218,7 @@ class PerceptionLMImageProcessorFast(BaseImageProcessorFast): closest_aspect_ratio = None if target_aspect_ratio >= 1: closest_aspect_ratio = min( - [k for k in asp_dict.keys() if k <= target_aspect_ratio], + [k for k in asp_dict if k <= target_aspect_ratio], key=lambda x: abs(x - target_aspect_ratio), ) tiles_given_aspect_ratio = asp_dict[closest_aspect_ratio] @@ -226,7 +226,7 @@ class PerceptionLMImageProcessorFast(BaseImageProcessorFast): return max(tiles_given_aspect_ratio, key=lambda x: x[0]) else: closest_aspect_ratio = min( - [k for k in asp_dict.keys() if k > target_aspect_ratio], + [k for k in asp_dict if k > target_aspect_ratio], key=lambda x: abs(1 / x - 1 / target_aspect_ratio), ) tiles_given_aspect_ratio = asp_dict[closest_aspect_ratio] diff --git a/src/transformers/models/phi/convert_phi_weights_to_hf.py b/src/transformers/models/phi/convert_phi_weights_to_hf.py index 0974233661..bbaa9b4c0c 100644 --- a/src/transformers/models/phi/convert_phi_weights_to_hf.py +++ b/src/transformers/models/phi/convert_phi_weights_to_hf.py @@ -105,7 +105,7 @@ def _download(url: str, root: str): def convert_phi_weights( model_name, checkpoint_path, pytorch_dump_folder_path, use_cuda, save_weights_directly, _MODELS ): - _MODELS = _MODELS if model_name not in _MODELS.keys() else {model_name: _MODELS.get(model_name)} + _MODELS = _MODELS if model_name not in _MODELS else {model_name: _MODELS.get(model_name)} device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" for model_name, model_url in _MODELS.items(): converted_checkpoint = {} diff --git a/src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py b/src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py index f19d864f46..bf3bf2b723 100644 --- a/src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py +++ b/src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py @@ -67,7 +67,7 @@ def rename_and_convert_flax_params(flax_dict): "decoder.logits_dense.weight": "decoder.lm_head.weight", } - for key in flax_dict.keys(): + for key in flax_dict: if "target" in key: # remove the first prefix from the key new_key = ".".join(key[1:]) diff --git a/src/transformers/models/pix2struct/image_processing_pix2struct.py b/src/transformers/models/pix2struct/image_processing_pix2struct.py index e434c01fe1..06dadb8a9f 100644 --- a/src/transformers/models/pix2struct/image_processing_pix2struct.py +++ b/src/transformers/models/pix2struct/image_processing_pix2struct.py @@ -404,7 +404,7 @@ class Pix2StructImageProcessor(BaseImageProcessor): max_patches = max_patches if max_patches is not None else self.max_patches is_vqa = self.is_vqa - if kwargs.get("data_format", None) is not None: + if kwargs.get("data_format") is not None: raise ValueError("data_format is not an accepted input as the outputs are ") images = make_list_of_images(images) diff --git a/src/transformers/models/plbart/tokenization_plbart.py b/src/transformers/models/plbart/tokenization_plbart.py index 7adeb11d6a..821c4c8172 100644 --- a/src/transformers/models/plbart/tokenization_plbart.py +++ b/src/transformers/models/plbart/tokenization_plbart.py @@ -425,7 +425,7 @@ class PLBartTokenizer(PreTrainedTokenizer): def _convert_lang_code_special_format(self, lang: str) -> str: """Convert Language Codes to format tokenizer uses if required""" - lang = FAIRSEQ_LANGUAGE_CODES_MAP[lang] if lang in FAIRSEQ_LANGUAGE_CODES_MAP.keys() else lang + lang = FAIRSEQ_LANGUAGE_CODES_MAP[lang] if lang in FAIRSEQ_LANGUAGE_CODES_MAP else lang return lang diff --git a/src/transformers/models/pop2piano/modeling_pop2piano.py b/src/transformers/models/pop2piano/modeling_pop2piano.py index 71a1397cc3..9676c39455 100644 --- a/src/transformers/models/pop2piano/modeling_pop2piano.py +++ b/src/transformers/models/pop2piano/modeling_pop2piano.py @@ -1072,7 +1072,7 @@ class Pop2PianoForConditionalGeneration(Pop2PianoPreTrainedModel, GenerationMixi - 0 for tokens that are **padded**. """ composer_to_feature_token = generation_config.composer_to_feature_token - if composer not in composer_to_feature_token.keys(): + if composer not in composer_to_feature_token: raise ValueError( f"Please choose a composer from {list(composer_to_feature_token.keys())}. Composer received - {composer}" ) diff --git a/src/transformers/models/pop2piano/tokenization_pop2piano.py b/src/transformers/models/pop2piano/tokenization_pop2piano.py index bf0c1afdfa..4356b5d93f 100644 --- a/src/transformers/models/pop2piano/tokenization_pop2piano.py +++ b/src/transformers/models/pop2piano/tokenization_pop2piano.py @@ -265,7 +265,7 @@ class Pop2PianoTokenizer(PreTrainedTokenizer): current_idx = start_idx current_velocity = 0 - note_onsets_ready = [None for i in range(sum([k.endswith("NOTE") for k in self.encoder.keys()]) + 1)] + note_onsets_ready = [None for i in range(sum([k.endswith("NOTE") for k in self.encoder]) + 1)] notes = [] for token_type, number in words: if token_type == "TOKEN_SPECIAL": diff --git a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py index f690ca5108..a10a0955f8 100644 --- a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py @@ -3872,7 +3872,7 @@ class Qwen2_5OmniForConditionalGeneration(Qwen2_5OmniPreTrainedModel, Generation # 2. Generate speech tokens from talker module embeds_to_talker = thinker_result.hidden_states[0][0].clone().to(input_ids.device) - if thinker_kwargs.get("input_features", None) is not None: + if thinker_kwargs.get("input_features") is not None: audio_ids_mask = input_ids == self.config.thinker_config.audio_token_index audio_mask = audio_ids_mask.unsqueeze(-1).expand_as(embeds_to_talker) audio_mask_tensor = torch.zeros( @@ -3881,7 +3881,7 @@ class Qwen2_5OmniForConditionalGeneration(Qwen2_5OmniPreTrainedModel, Generation device=input_ids.device, ) embeds_to_talker.masked_scatter_(audio_mask, audio_mask_tensor) - if thinker_kwargs.get("pixel_values", None) is not None: + if thinker_kwargs.get("pixel_values") is not None: image_ids_mask = input_ids == self.config.thinker_config.image_token_index image_mask = image_ids_mask.unsqueeze(-1).expand_as(embeds_to_talker) image_mask_tensor = torch.zeros( @@ -3890,7 +3890,7 @@ class Qwen2_5OmniForConditionalGeneration(Qwen2_5OmniPreTrainedModel, Generation device=input_ids.device, ) embeds_to_talker.masked_scatter_(image_mask, image_mask_tensor) - if thinker_kwargs.get("pixel_values_videos", None) is not None: + if thinker_kwargs.get("pixel_values_videos") is not None: video_ids_mask = input_ids == self.config.thinker_config.video_token_index video_mask = video_ids_mask.unsqueeze(-1).expand_as(embeds_to_talker) video_mask_tensor = torch.zeros( diff --git a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py index b8e5bc6216..56fdff57e5 100644 --- a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py @@ -4169,7 +4169,7 @@ class Qwen2_5OmniForConditionalGeneration(Qwen2_5OmniPreTrainedModel, Generation # 2. Generate speech tokens from talker module embeds_to_talker = thinker_result.hidden_states[0][0].clone().to(input_ids.device) - if thinker_kwargs.get("input_features", None) is not None: + if thinker_kwargs.get("input_features") is not None: audio_ids_mask = input_ids == self.config.thinker_config.audio_token_index audio_mask = audio_ids_mask.unsqueeze(-1).expand_as(embeds_to_talker) audio_mask_tensor = torch.zeros( @@ -4178,7 +4178,7 @@ class Qwen2_5OmniForConditionalGeneration(Qwen2_5OmniPreTrainedModel, Generation device=input_ids.device, ) embeds_to_talker.masked_scatter_(audio_mask, audio_mask_tensor) - if thinker_kwargs.get("pixel_values", None) is not None: + if thinker_kwargs.get("pixel_values") is not None: image_ids_mask = input_ids == self.config.thinker_config.image_token_index image_mask = image_ids_mask.unsqueeze(-1).expand_as(embeds_to_talker) image_mask_tensor = torch.zeros( @@ -4187,7 +4187,7 @@ class Qwen2_5OmniForConditionalGeneration(Qwen2_5OmniPreTrainedModel, Generation device=input_ids.device, ) embeds_to_talker.masked_scatter_(image_mask, image_mask_tensor) - if thinker_kwargs.get("pixel_values_videos", None) is not None: + if thinker_kwargs.get("pixel_values_videos") is not None: video_ids_mask = input_ids == self.config.thinker_config.video_token_index video_mask = video_ids_mask.unsqueeze(-1).expand_as(embeds_to_talker) video_mask_tensor = torch.zeros( diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index ffc1f60c8f..367b4dc456 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -318,9 +318,9 @@ class RagPreTrainedModel(PreTrainedModel): } # remove question_encoder, generator kwargs from kwargs - for key in kwargs_question_encoder.keys(): + for key in kwargs_question_encoder: del kwargs["question_encoder_" + key] - for key in kwargs_generator.keys(): + for key in kwargs_generator: del kwargs["generator_" + key] # Load and initialize the question_encoder and generator @@ -370,7 +370,7 @@ class RagPreTrainedModel(PreTrainedModel): ) # instantiate config with corresponding kwargs - config = kwargs.get("config", None) + config = kwargs.get("config") if config is None: config = RagConfig.from_question_encoder_generator_configs( question_encoder.config, generator.config, **kwargs diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 1d8c3f1fc8..f430239e0d 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -322,9 +322,9 @@ class TFRagPreTrainedModel(TFPreTrainedModel): } # remove question_encoder, generator kwargs from kwargs - for key in kwargs_question_encoder.keys(): + for key in kwargs_question_encoder: del kwargs["question_encoder_" + key] - for key in kwargs_generator.keys(): + for key in kwargs_generator: del kwargs["generator_" + key] # Load and initialize the question_encoder and generator @@ -376,7 +376,7 @@ class TFRagPreTrainedModel(TFPreTrainedModel): ) # instantiate config with corresponding kwargs - config = kwargs.get("config", None) + config = kwargs.get("config") if config is None: config = RagConfig.from_question_encoder_generator_configs( question_encoder.config, generator.config, **kwargs diff --git a/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py index 9bf3a17e91..ed4bc48035 100644 --- a/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py +++ b/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py @@ -51,7 +51,7 @@ class Tracker: name2module: dict[str, nn.Module] = field(default_factory=OrderedDict) def _forward_hook(self, m, inputs: Tensor, outputs: Tensor, name: str): - has_not_submodules = len(list(m.modules())) == 1 or isinstance(m, nn.Conv2d) or isinstance(m, nn.BatchNorm2d) + has_not_submodules = len(list(m.modules())) == 1 or isinstance(m, (nn.Conv2d, nn.BatchNorm2d)) if has_not_submodules: self.traced.append(m) self.name2module[name] = m @@ -217,7 +217,7 @@ def convert_weights_and_push(save_directory: Path, model_name: Optional[str] = N not_used_keys = list(from_state_dict.keys()) regex = r"\.block.-part." # this is "interesting", so the original checkpoints have `block[0,1]-part` in each key name, we remove it - for key in from_state_dict.keys(): + for key in from_state_dict: # remove the weird "block[0,1]-part" from the key src_key = re.sub(regex, "", key) # now src_key from the model checkpoints is the one we got from the original model after tracing, so use it to get the correct destination key diff --git a/src/transformers/models/regnet/convert_regnet_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_to_pytorch.py index b7b48568a1..9d6659d768 100644 --- a/src/transformers/models/regnet/convert_regnet_to_pytorch.py +++ b/src/transformers/models/regnet/convert_regnet_to_pytorch.py @@ -44,7 +44,7 @@ class Tracker: handles: list = field(default_factory=list) def _forward_hook(self, m, inputs: Tensor, outputs: Tensor): - has_not_submodules = len(list(m.modules())) == 1 or isinstance(m, nn.Conv2d) or isinstance(m, nn.BatchNorm2d) + has_not_submodules = len(list(m.modules())) == 1 or isinstance(m, (nn.Conv2d, nn.BatchNorm2d)) if has_not_submodules: self.traced.append(m) diff --git a/src/transformers/models/resnet/convert_resnet_to_pytorch.py b/src/transformers/models/resnet/convert_resnet_to_pytorch.py index 7443b9cf50..11b09c372c 100644 --- a/src/transformers/models/resnet/convert_resnet_to_pytorch.py +++ b/src/transformers/models/resnet/convert_resnet_to_pytorch.py @@ -42,7 +42,7 @@ class Tracker: handles: list = field(default_factory=list) def _forward_hook(self, m, inputs: Tensor, outputs: Tensor): - has_not_submodules = len(list(m.modules())) == 1 or isinstance(m, nn.Conv2d) or isinstance(m, nn.BatchNorm2d) + has_not_submodules = len(list(m.modules())) == 1 or isinstance(m, (nn.Conv2d, nn.BatchNorm2d)) if has_not_submodules: self.traced.append(m) diff --git a/src/transformers/models/rt_detr/convert_rt_detr_original_pytorch_checkpoint_to_hf.py b/src/transformers/models/rt_detr/convert_rt_detr_original_pytorch_checkpoint_to_hf.py index 9f2271930e..8a76fa4b4d 100644 --- a/src/transformers/models/rt_detr/convert_rt_detr_original_pytorch_checkpoint_to_hf.py +++ b/src/transformers/models/rt_detr/convert_rt_detr_original_pytorch_checkpoint_to_hf.py @@ -574,7 +574,7 @@ def convert_rt_detr_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub # query, key and value matrices need special treatment read_in_q_k_v(state_dict, config) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if key.endswith("num_batches_tracked"): del state_dict[key] # for two_stage diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py index 76321cc22e..b6f52c28f8 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py @@ -160,8 +160,8 @@ class RTDetrImageProcessorFast(BaseImageProcessorFast): def __init__(self, **kwargs: Unpack[RTDetrFastImageProcessorKwargs]) -> None: # Backwards compatibility - do_convert_annotations = kwargs.get("do_convert_annotations", None) - do_normalize = kwargs.get("do_normalize", None) + do_convert_annotations = kwargs.get("do_convert_annotations") + do_normalize = kwargs.get("do_normalize") if do_convert_annotations is None and getattr(self, "do_convert_annotations", None) is None: self.do_convert_annotations = do_normalize if do_normalize is not None else self.do_normalize diff --git a/src/transformers/models/rt_detr/modular_rt_detr.py b/src/transformers/models/rt_detr/modular_rt_detr.py index 641519fafe..9a77a92b5a 100644 --- a/src/transformers/models/rt_detr/modular_rt_detr.py +++ b/src/transformers/models/rt_detr/modular_rt_detr.py @@ -123,8 +123,8 @@ class RTDetrImageProcessorFast(DetrImageProcessorFast, BaseImageProcessorFast): def __init__(self, **kwargs: Unpack[RTDetrFastImageProcessorKwargs]) -> None: # Backwards compatibility - do_convert_annotations = kwargs.get("do_convert_annotations", None) - do_normalize = kwargs.get("do_normalize", None) + do_convert_annotations = kwargs.get("do_convert_annotations") + do_normalize = kwargs.get("do_normalize") if do_convert_annotations is None and getattr(self, "do_convert_annotations", None) is None: self.do_convert_annotations = do_normalize if do_normalize is not None else self.do_normalize diff --git a/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py b/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py index 1a86ac02af..d2f9b200df 100644 --- a/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py +++ b/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py @@ -239,13 +239,13 @@ def write_model_and_image_processor(model_name, output_dir, push_to_hub, repo_id ]["module"] # rename keys state_dict = convert_old_keys_to_new_keys(state_dict) - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if key.endswith("num_batches_tracked"): del state_dict[key] # query, key and value matrices need special treatment read_in_q_k_v(state_dict, config) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if key.endswith("num_batches_tracked"): del state_dict[key] # for two_stage diff --git a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py index 1c47aec148..619b6448b4 100755 --- a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py @@ -3910,7 +3910,7 @@ class SeamlessM4TModel(SeamlessM4TPreTrainedModel, GenerationMixin): shape `(batch_size, sequence_length)` and `waveform_lengths` which gives the length of each sample. - If `generate_speech=False`, it will returns `ModelOutput`. """ - if input_ids is None and input_features is None and kwargs.get("inputs_embeds", None) is None: + if input_ids is None and input_features is None and kwargs.get("inputs_embeds") is None: raise ValueError( "`input_ids`,`input_features` and `inputs_embeds` are all empty. Make sure at least one of them is not." ) diff --git a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py index 6160d311c8..950c0d3b8f 100644 --- a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +++ b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py @@ -4208,7 +4208,7 @@ class SeamlessM4Tv2Model(SeamlessM4Tv2PreTrainedModel, GenerationMixin): shape `(batch_size, sequence_length)` and `waveform_lengths` which gives the length of each sample. - If `generate_speech=False`, it will returns `ModelOutput`. """ - if input_ids is None and input_features is None and kwargs.get("inputs_embeds", None) is None: + if input_ids is None and input_features is None and kwargs.get("inputs_embeds") is None: raise ValueError( "`input_ids`,`input_features` and `inputs_embeds` are all empty. Make sure at least one of them is not." ) diff --git a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py index c809b804c8..bc541262c7 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py @@ -844,9 +844,9 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): } # remove encoder, decoder kwargs from kwargs - for key in kwargs_encoder.keys(): + for key in kwargs_encoder: del kwargs["encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py index 0b17ce1548..f4e804dac2 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py @@ -242,9 +242,9 @@ class SpeechEncoderDecoderModel(PreTrainedModel, GenerationMixin): } # remove encoder, decoder kwargs from kwargs - for key in kwargs_encoder.keys(): + for key in kwargs_encoder: del kwargs["encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py index 3567bb674e..a2d8893b5d 100644 --- a/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py +++ b/src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py @@ -65,7 +65,7 @@ def rename_key(dct, old, new): def create_rename_keys(state_dict): rename_keys = [] - for k in state_dict.keys(): + for k in state_dict: k_new = k if ".pwconv" in k: k_new = k_new.replace(".pwconv", ".point_wise_conv") diff --git a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py index 9a87ff693a..dbaeeb31ef 100644 --- a/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py +++ b/src/transformers/models/swin/convert_swin_simmim_to_pytorch.py @@ -83,7 +83,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, model): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "attn_mask" in key: diff --git a/src/transformers/models/swin/convert_swin_timm_to_pytorch.py b/src/transformers/models/swin/convert_swin_timm_to_pytorch.py index c91249b272..9971da844a 100644 --- a/src/transformers/models/swin/convert_swin_timm_to_pytorch.py +++ b/src/transformers/models/swin/convert_swin_timm_to_pytorch.py @@ -90,7 +90,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, model): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "mask" in key: diff --git a/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py b/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py index 192e58d9db..e827070ed5 100644 --- a/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py +++ b/src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py @@ -124,7 +124,7 @@ def rename_key(name, config): def convert_state_dict(orig_state_dict, config): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "qkv" in key: diff --git a/src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py b/src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py index 0e6e837a7e..60ea55edee 100644 --- a/src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py +++ b/src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py @@ -130,7 +130,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, model): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "mask" in key: diff --git a/src/transformers/models/switch_transformers/convert_big_switch.py b/src/transformers/models/switch_transformers/convert_big_switch.py index 8d19daf1c1..e6ef99a310 100644 --- a/src/transformers/models/switch_transformers/convert_big_switch.py +++ b/src/transformers/models/switch_transformers/convert_big_switch.py @@ -80,7 +80,7 @@ def shard_on_the_fly(switch_checkpoint_path, dump_path, max_shard_size, dtype, w checkpoint_info = flatten_dict(checkpoint_info, sep="/") all_layers = {} - for layer in checkpoint_info.keys(): + for layer in checkpoint_info: curr_real_layer_name, split_layer, content = get_key_and_tensorstore_dict( layer, checkpoint_info, switch_checkpoint_path ) diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index e39c4b2f99..aeb8de0424 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -911,7 +911,7 @@ class T5Stack(T5PreTrainedModel): ) assert_device_map(self.device_map, len(self.block)) self.model_parallel = True - self.first_device = "cpu" if "cpu" in self.device_map.keys() else "cuda:" + str(min(self.device_map.keys())) + self.first_device = "cpu" if "cpu" in self.device_map else "cuda:" + str(min(self.device_map.keys())) self.last_device = "cuda:" + str(max(self.device_map.keys())) # Load onto devices for k, v in self.device_map.items(): diff --git a/src/transformers/models/table_transformer/convert_table_transformer_to_hf.py b/src/transformers/models/table_transformer/convert_table_transformer_to_hf.py index 487cdc4819..aeba012ad0 100644 --- a/src/transformers/models/table_transformer/convert_table_transformer_to_hf.py +++ b/src/transformers/models/table_transformer/convert_table_transformer_to_hf.py @@ -203,7 +203,7 @@ def convert_table_transformer_checkpoint(checkpoint_url, pytorch_dump_folder_pat read_in_q_k_v(state_dict) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them prefix = "model." - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if not key.startswith("class_labels_classifier") and not key.startswith("bbox_predictor"): val = state_dict.pop(key) state_dict[prefix + key] = val diff --git a/src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py b/src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py index 1073d48877..f9964369bf 100644 --- a/src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py +++ b/src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py @@ -335,7 +335,7 @@ def convert_table_transformer_checkpoint(checkpoint_url, pytorch_dump_folder_pat read_in_q_k_v(state_dict) # important: we need to prepend a prefix to each of the base model keys as the head models use different attributes for them prefix = "model." - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if not key.startswith("class_labels_classifier") and not key.startswith("bbox_predictor"): val = state_dict.pop(key) state_dict[prefix + key] = val diff --git a/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py b/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py index cda9b0c182..5db24e6367 100644 --- a/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py +++ b/src/transformers/models/timesformer/convert_timesformer_to_pytorch.py @@ -101,7 +101,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, config): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if key.startswith("model."): diff --git a/src/transformers/models/timm_wrapper/configuration_timm_wrapper.py b/src/transformers/models/timm_wrapper/configuration_timm_wrapper.py index 39ed2098d6..5fa115a054 100644 --- a/src/transformers/models/timm_wrapper/configuration_timm_wrapper.py +++ b/src/transformers/models/timm_wrapper/configuration_timm_wrapper.py @@ -77,7 +77,7 @@ class TimmWrapperConfig(PretrainedConfig): @classmethod def from_dict(cls, config_dict: dict[str, Any], **kwargs): - label_names = config_dict.get("label_names", None) + label_names = config_dict.get("label_names") is_custom_model = "num_labels" in kwargs or "id2label" in kwargs # if no labels added to config, use imagenet labeller in timm diff --git a/src/transformers/models/udop/tokenization_udop_fast.py b/src/transformers/models/udop/tokenization_udop_fast.py index a9f18c1b2b..a8878b9b51 100644 --- a/src/transformers/models/udop/tokenization_udop_fast.py +++ b/src/transformers/models/udop/tokenization_udop_fast.py @@ -591,7 +591,7 @@ class UdopTokenizerFast(PreTrainedTokenizerFast): # To match each overflowing sample with the original sample in the batch # we add an overflow_to_sample_mapping array (see below) sanitized_tokens = {} - for key in tokens_and_encodings[0][0].keys(): + for key in tokens_and_encodings[0][0]: stack = [e for item, _ in tokens_and_encodings for e in item[key]] sanitized_tokens[key] = stack sanitized_encodings = [e for _, item in tokens_and_encodings for e in item] diff --git a/src/transformers/models/upernet/convert_convnext_upernet_to_pytorch.py b/src/transformers/models/upernet/convert_convnext_upernet_to_pytorch.py index eeb3ab5fc9..17d110c577 100644 --- a/src/transformers/models/upernet/convert_convnext_upernet_to_pytorch.py +++ b/src/transformers/models/upernet/convert_convnext_upernet_to_pytorch.py @@ -134,7 +134,7 @@ def convert_upernet_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub model.eval() # replace "bn" => "batch_norm" - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) if "bn" in key: key = key.replace("bn", "batch_norm") diff --git a/src/transformers/models/upernet/convert_swin_upernet_to_pytorch.py b/src/transformers/models/upernet/convert_swin_upernet_to_pytorch.py index 9580af7c46..edf0e142da 100644 --- a/src/transformers/models/upernet/convert_swin_upernet_to_pytorch.py +++ b/src/transformers/models/upernet/convert_swin_upernet_to_pytorch.py @@ -208,7 +208,7 @@ def convert_upernet_checkpoint(model_name, pytorch_dump_folder_path, push_to_hub model.eval() # replace "bn" => "batch_norm" - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) if "bn" in key: key = key.replace("bn", "batch_norm") diff --git a/src/transformers/models/videomae/convert_videomae_to_pytorch.py b/src/transformers/models/videomae/convert_videomae_to_pytorch.py index 011c1862eb..2a1ab62c6a 100644 --- a/src/transformers/models/videomae/convert_videomae_to_pytorch.py +++ b/src/transformers/models/videomae/convert_videomae_to_pytorch.py @@ -136,7 +136,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, config): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if key.startswith("encoder."): diff --git a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py index c5467852c2..4c7869184e 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py @@ -785,9 +785,9 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): } # remove encoder, decoder kwargs from kwargs - for key in kwargs_encoder.keys(): + for key in kwargs_encoder: del kwargs["encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index e1274cacfb..5818a74271 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -375,9 +375,9 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos } # remove encoder, decoder kwargs from kwargs - for key in kwargs_encoder.keys(): + for key in kwargs_encoder: del kwargs["encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py index 96d8228ea5..541c8bc9b9 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py @@ -257,7 +257,7 @@ class VisionEncoderDecoderModel(PreTrainedModel, GenerationMixin): del tf_model gc.collect() - attn_implementation = kwargs.get("attn_implementation", None) + attn_implementation = kwargs.get("attn_implementation") kwargs_encoder_decoder = {} if attn_implementation: kwargs_encoder_decoder = { @@ -360,9 +360,9 @@ class VisionEncoderDecoderModel(PreTrainedModel, GenerationMixin): } # remove encoder, decoder kwargs from kwargs - for key in kwargs_encoder.keys(): + for key in kwargs_encoder: del kwargs["encoder_" + key] - for key in kwargs_decoder.keys(): + for key in kwargs_decoder: del kwargs["decoder_" + key] # Load and initialize the encoder and decoder diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py index 44c5e6c172..663d6944c6 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py @@ -480,9 +480,9 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel): } # remove text, vision kwargs from kwargs - for key in kwargs_vision.keys(): + for key in kwargs_vision: del kwargs["vision_" + key] - for key in kwargs_text.keys(): + for key in kwargs_text: del kwargs["text_" + key] # Load and initialize the text and vision model diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py index 62ec6c955b..42ff0be7a9 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py @@ -525,9 +525,9 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel): } # remove vision, text kwargs from kwargs - for key in kwargs_vision.keys(): + for key in kwargs_vision: del kwargs["vision_" + key] - for key in kwargs_text.keys(): + for key in kwargs_text: del kwargs["text_" + key] # Load and initialize the vision and text model diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py index fb1abc020a..fd36c7afaf 100755 --- a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py @@ -366,9 +366,9 @@ class VisionTextDualEncoderModel(PreTrainedModel): } # remove vision, text kwargs from kwargs - for key in kwargs_vision.keys(): + for key in kwargs_vision: del kwargs["vision_" + key] - for key in kwargs_text.keys(): + for key in kwargs_text: del kwargs["text_" + key] # Load and initialize the vision and text model diff --git a/src/transformers/models/vit_mae/convert_vit_mae_to_pytorch.py b/src/transformers/models/vit_mae/convert_vit_mae_to_pytorch.py index 47e77593f6..c7e4a7dc3b 100644 --- a/src/transformers/models/vit_mae/convert_vit_mae_to_pytorch.py +++ b/src/transformers/models/vit_mae/convert_vit_mae_to_pytorch.py @@ -67,7 +67,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, config): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "qkv" in key: diff --git a/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py b/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py index 5153e1faf5..e9b171876a 100644 --- a/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py +++ b/src/transformers/models/vitmatte/convert_vitmatte_to_hf.py @@ -85,7 +85,7 @@ def convert_vitmatte_checkpoint(model_name, pytorch_dump_folder_path, push_to_hu state_dict = torch.load(filepath, map_location="cpu", weights_only=True) # rename keys - for key in state_dict.copy().keys(): + for key in state_dict.copy(): val = state_dict.pop(key) if "backbone.blocks" in key: key = key.replace("backbone.blocks", "backbone.encoder.layer") diff --git a/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py b/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py index f4b5e1cfda..bf6aa8e4a3 100644 --- a/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py +++ b/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py @@ -129,7 +129,7 @@ def transform_state_encoder_block(state_dict, i): def get_n_layers(state_dict): - return sum([1 if "encoderblock_" in k else 0 for k in state_dict["optimizer"]["target"]["Transformer"].keys()]) + return sum([1 if "encoderblock_" in k else 0 for k in state_dict["optimizer"]["target"]["Transformer"]]) def transform_state(state_dict, classification_head=False): diff --git a/src/transformers/models/voxtral/processing_voxtral.py b/src/transformers/models/voxtral/processing_voxtral.py index 598529bf5c..7867827322 100644 --- a/src/transformers/models/voxtral/processing_voxtral.py +++ b/src/transformers/models/voxtral/processing_voxtral.py @@ -166,7 +166,7 @@ class VoxtralProcessor(ProcessorMixin): } for kwarg_type in processed_kwargs: - for key in AllKwargsForChatTemplate.__annotations__[kwarg_type].__annotations__.keys(): + for key in AllKwargsForChatTemplate.__annotations__[kwarg_type].__annotations__: kwarg_type_defaults = AllKwargsForChatTemplate.__annotations__[kwarg_type] default_value = getattr(kwarg_type_defaults, key, None) value = kwargs.pop(key, default_value) diff --git a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py index 1e0f00f477..361684319c 100644 --- a/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py @@ -89,7 +89,7 @@ def set_recursively(key, value, full_name, weight_type, hf_pointer): hf_pointer = getattr(hf_pointer, attribute) hf_param_name = None - for param_key in PARAM_MAPPING.keys(): + for param_key in PARAM_MAPPING: if full_name.endswith(param_key): hf_param_name = PARAM_MAPPING[full_name.split(".")[-1]] weight_type = "param" @@ -148,7 +148,7 @@ def set_recursively(key, value, full_name, weight_type, hf_pointer): def rename_dict(key, value, full_name, weight_type, hf_dict): hf_param_name = None - for param_key in PARAM_MAPPING.keys(): + for param_key in PARAM_MAPPING: if full_name.endswith(param_key): hf_param_name = PARAM_MAPPING[full_name.split(".")[-1]] weight_type = "param" diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index dd116a7c80..427cd7261f 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -1300,7 +1300,7 @@ class TFWav2Vec2MainLayer(keras.layers.Layer): hidden_states, extract_features = self.feature_projection(extract_features, training=training) - mask_time_indices = kwargs.get("mask_time_indices", None) + mask_time_indices = kwargs.get("mask_time_indices") if training: hidden_states = self._mask_hidden_states(hidden_states, mask_time_indices=mask_time_indices) diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index 14e61ec513..7fab737064 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -191,7 +191,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): # make sure that tokens made of several # characters are not split at tokenization - for token in self.encoder.keys(): + for token in self.encoder: if len(token) > 1: self.add_tokens(AddedToken(token, rstrip=True, lstrip=True, normalized=False)) @@ -212,7 +212,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): # make sure that tokens made of several # characters are not split at tokenization - for token in self.encoder.keys(): + for token in self.encoder: if len(token) > 1: self.add_tokens(AddedToken(token, rstrip=True, lstrip=True, normalized=False)) diff --git a/src/transformers/models/whisper/generation_whisper.py b/src/transformers/models/whisper/generation_whisper.py index 1cdca9ba03..a5cbcd9534 100644 --- a/src/transformers/models/whisper/generation_whisper.py +++ b/src/transformers/models/whisper/generation_whisper.py @@ -1181,7 +1181,7 @@ class WhisperGenerationMixin(GenerationMixin): def _stack_split_outputs(self, seek_outputs, model_output_type, device, kwargs): # Stack back seek_outputs tensors after splitting them with the split_by_batch_index method outputs = {} - for key in seek_outputs[0].keys(): + for key in seek_outputs[0]: if key in ["sequences", "beam_indices", "token_timestamps"]: outputs[key] = torch.stack([v[key] for v in seek_outputs], dim=0).to(device) elif key in ["scores", "encoder_attentions", "encoder_hidden_states", "logits"]: @@ -1212,7 +1212,7 @@ class WhisperGenerationMixin(GenerationMixin): else: outputs[key] = None - token_timestamps = outputs.get("token_timestamps", None) + token_timestamps = outputs.get("token_timestamps") if token_timestamps is not None: model_output_type = dict @@ -1442,9 +1442,9 @@ class WhisperGenerationMixin(GenerationMixin): def language_to_id(language: str) -> int: language = language.lower() - if language in generation_config.lang_to_id.keys(): + if language in generation_config.lang_to_id: language_token = language - elif language in TO_LANGUAGE_CODE.keys(): + elif language in TO_LANGUAGE_CODE: language_token = f"<|{TO_LANGUAGE_CODE[language]}|>" elif language in TO_LANGUAGE_CODE.values(): language_token = f"<|{language}|>" diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index a3c71745c5..b11df907b3 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -1592,14 +1592,14 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua ): forced_decoder_ids = self.generation_config.forced_decoder_ids else: - forced_decoder_ids = kwargs.get("forced_decoder_ids", None) + forced_decoder_ids = kwargs.get("forced_decoder_ids") if task is not None or language is not None or (forced_decoder_ids is None and prompt_ids is not None): forced_decoder_ids = [] if hasattr(generation_config, "language"): - if generation_config.language in generation_config.lang_to_id.keys(): + if generation_config.language in generation_config.lang_to_id: language_token = generation_config.language - elif generation_config.language in TO_LANGUAGE_CODE.keys(): + elif generation_config.language in TO_LANGUAGE_CODE: language_token = f"<|{TO_LANGUAGE_CODE[generation_config.language]}|>" elif generation_config.language in TO_LANGUAGE_CODE.values(): language_token = f"<|{generation_config.language}|>" diff --git a/src/transformers/models/whisper/tokenization_whisper.py b/src/transformers/models/whisper/tokenization_whisper.py index 2d9dd6845c..4147f14d86 100644 --- a/src/transformers/models/whisper/tokenization_whisper.py +++ b/src/transformers/models/whisper/tokenization_whisper.py @@ -1007,7 +1007,7 @@ def _decode_asr(tokenizer, model_outputs, *, return_timestamps, return_language, text = tokenizer.decode([token]) # Removing outer shell <|XX|> text = text[2:-2] - language = LANGUAGES.get(text, None) + language = LANGUAGES.get(text) if language is not None: # 1/ Indeed some language # TODO Handle when language is different from the previous diff --git a/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py b/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py index 6f36b19055..fbd2762cef 100644 --- a/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py +++ b/src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py @@ -118,7 +118,7 @@ def rename_key(name): def convert_state_dict(orig_state_dict, config): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "attn.in_proj" in key: diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index 5915e2e9ae..d799ced792 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -890,8 +890,8 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss): if past_key_values: inputs = tf.expand_dims(inputs[:, -1], -1) - position_ids = kwargs.get("position_ids", None) - attention_mask = kwargs.get("attention_mask", None) + position_ids = kwargs.get("position_ids") + attention_mask = kwargs.get("attention_mask") if attention_mask is not None and position_ids is None: position_ids = tf.math.cumsum(attention_mask, axis=-1, exclusive=True) diff --git a/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py index d43c05cd62..c110c005af 100644 --- a/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py @@ -144,7 +144,7 @@ def convert_xmod_checkpoint_to_pytorch( if sorted(bert_output.adapter_modules.keys()) != sorted(xmod_layer.adapter_modules.keys()): raise AssertionError("Lists of language adapters do not match.") - for lang_code in xmod_layer.adapter_modules.keys(): + for lang_code in xmod_layer.adapter_modules: to_adapter = bert_output.adapter_modules[lang_code] from_adapter = xmod_layer.adapter_modules[lang_code] to_adapter.dense1.weight = from_adapter.fc1.weight diff --git a/src/transformers/models/yolos/convert_yolos_to_pytorch.py b/src/transformers/models/yolos/convert_yolos_to_pytorch.py index 907a11d067..54fbd18e06 100644 --- a/src/transformers/models/yolos/convert_yolos_to_pytorch.py +++ b/src/transformers/models/yolos/convert_yolos_to_pytorch.py @@ -123,7 +123,7 @@ def rename_key(name: str) -> str: def convert_state_dict(orig_state_dict: dict, model: YolosForObjectDetection) -> dict: - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if "qkv" in key: diff --git a/src/transformers/models/yolos/image_processing_yolos_fast.py b/src/transformers/models/yolos/image_processing_yolos_fast.py index 85fb1d142d..d7efb99e46 100644 --- a/src/transformers/models/yolos/image_processing_yolos_fast.py +++ b/src/transformers/models/yolos/image_processing_yolos_fast.py @@ -366,8 +366,8 @@ class YolosImageProcessorFast(BaseImageProcessorFast): self.size = get_size_dict(size, max_size=max_size, default_to_square=False) # Backwards compatibility - do_convert_annotations = kwargs.get("do_convert_annotations", None) - do_normalize = kwargs.get("do_normalize", None) + do_convert_annotations = kwargs.get("do_convert_annotations") + do_normalize = kwargs.get("do_normalize") if do_convert_annotations is None and getattr(self, "do_convert_annotations", None) is None: self.do_convert_annotations = do_normalize if do_normalize is not None else self.do_normalize diff --git a/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py b/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py index 07652ed021..b1d3e96859 100644 --- a/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py +++ b/src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py @@ -60,7 +60,7 @@ def rename_key(orig_key): def convert_checkpoint_helper(max_position_embeddings, orig_state_dict): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) if ("pooler" in key) or ("sen_class" in key): diff --git a/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py b/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py index 81fcb66afa..03f2145418 100644 --- a/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py +++ b/src/transformers/models/zoedepth/convert_zoedepth_to_hf.py @@ -255,7 +255,7 @@ def read_in_q_k_v_metric_head(state_dict): def convert_state_dict(orig_state_dict): - for key in orig_state_dict.copy().keys(): + for key in orig_state_dict.copy(): val = orig_state_dict.pop(key) # rename key @@ -266,7 +266,7 @@ def convert_state_dict(orig_state_dict): def remove_ignore_keys(state_dict): - for key in state_dict.copy().keys(): + for key in state_dict.copy(): if ( "fc_norm" in key or "relative_position_index" in key diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py index 27a47c4ea9..152bc98d6d 100644 --- a/src/transformers/onnx/convert.py +++ b/src/transformers/onnx/convert.py @@ -457,5 +457,5 @@ def ensure_model_and_config_inputs_match( # Make sure the input order match (VERY IMPORTANT !!!!) matching_inputs = forward_inputs_set.intersection(model_inputs_set) - ordered_inputs = [parameter for parameter in forward_parameters.keys() if parameter in matching_inputs] + ordered_inputs = [parameter for parameter in forward_parameters if parameter in matching_inputs] return is_ok, ordered_inputs diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index 1c71487dbb..688d0f8db5 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -625,7 +625,7 @@ def get_scheduler( optimizer_dict = optimizer.optimizer_dict scheduler_dict = {} - for param in optimizer_dict.keys(): + for param in optimizer_dict: scheduler_dict[param] = get_scheduler( name, optimizer=optimizer_dict[param], @@ -639,7 +639,7 @@ def get_scheduler( # attach the scheduler hook, the gradients have been zeroed here scheduler_dict[param].step() - for param in optimizer_dict.keys(): + for param in optimizer_dict: if param.requires_grad: param.register_post_accumulate_grad_hook(scheduler_hook) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 238467ee29..3e73980095 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -1141,7 +1141,7 @@ class Pipeline(_ScikitCompat, PushToHubMixin): "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) diff --git a/src/transformers/pipelines/image_to_image.py b/src/transformers/pipelines/image_to_image.py index 52fc99bd05..d87f6928c9 100644 --- a/src/transformers/pipelines/image_to_image.py +++ b/src/transformers/pipelines/image_to_image.py @@ -136,7 +136,7 @@ class ImageToImagePipeline(Pipeline): def postprocess(self, model_outputs): images = [] - if "reconstruction" in model_outputs.keys(): + if "reconstruction" in model_outputs: outputs = model_outputs.reconstruction for output in outputs: output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy() diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py index 2eee80a907..ed21e39084 100644 --- a/src/transformers/pipelines/question_answering.py +++ b/src/transformers/pipelines/question_answering.py @@ -524,7 +524,7 @@ class QuestionAnsweringPipeline(ChunkPipeline): model_inputs = {k: inputs[k] for k in self.tokenizer.model_input_names} # `XXXForSequenceClassification` models should not use `use_cache=True` even if it's supported model_forward = self.model.forward if self.framework == "pt" else self.model.call - if "use_cache" in inspect.signature(model_forward).parameters.keys(): + if "use_cache" in inspect.signature(model_forward).parameters: model_inputs["use_cache"] = False output = self.model(**model_inputs) if isinstance(output, dict): diff --git a/src/transformers/pipelines/text_classification.py b/src/transformers/pipelines/text_classification.py index 367d867d0e..6f11f3bc97 100644 --- a/src/transformers/pipelines/text_classification.py +++ b/src/transformers/pipelines/text_classification.py @@ -194,7 +194,7 @@ class TextClassificationPipeline(Pipeline): def _forward(self, model_inputs): # `XXXForSequenceClassification` models should not use `use_cache=True` even if it's supported model_forward = self.model.forward if self.framework == "pt" else self.model.call - if "use_cache" in inspect.signature(model_forward).parameters.keys(): + if "use_cache" in inspect.signature(model_forward).parameters: model_inputs["use_cache"] = False return self.model(**model_inputs) diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py index d7663eb170..a9d1467482 100644 --- a/src/transformers/pipelines/token_classification.py +++ b/src/transformers/pipelines/token_classification.py @@ -31,7 +31,7 @@ class TokenClassificationArgumentHandler(ArgumentHandler): def __call__(self, inputs: Union[str, list[str]], **kwargs): is_split_into_words = kwargs.get("is_split_into_words", False) - delimiter = kwargs.get("delimiter", None) + delimiter = kwargs.get("delimiter") if inputs is not None and isinstance(inputs, (list, tuple)) and len(inputs) > 0: inputs = list(inputs) diff --git a/src/transformers/pipelines/zero_shot_classification.py b/src/transformers/pipelines/zero_shot_classification.py index b571a7896b..20675d4a29 100644 --- a/src/transformers/pipelines/zero_shot_classification.py +++ b/src/transformers/pipelines/zero_shot_classification.py @@ -146,7 +146,7 @@ class ZeroShotClassificationPipeline(ChunkPipeline): return inputs def _sanitize_parameters(self, **kwargs): - if kwargs.get("multi_class", None) is not None: + if kwargs.get("multi_class") is not None: kwargs["multi_label"] = kwargs["multi_class"] logger.warning( "The `multi_class` argument has been deprecated and renamed to `multi_label`. " @@ -227,7 +227,7 @@ class ZeroShotClassificationPipeline(ChunkPipeline): model_inputs = {k: inputs[k] for k in self.tokenizer.model_input_names} # `XXXForSequenceClassification` models should not use `use_cache=True` even if it's supported model_forward = self.model.forward if self.framework == "pt" else self.model.call - if "use_cache" in inspect.signature(model_forward).parameters.keys(): + if "use_cache" in inspect.signature(model_forward).parameters: model_inputs["use_cache"] = False outputs = self.model(**model_inputs) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index a40f2ad7fd..7f77daa297 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -678,7 +678,7 @@ class ProcessorMixin(PushToHubMixin): "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) @@ -1187,7 +1187,7 @@ class ProcessorMixin(PushToHubMixin): for modality in default_kwargs: # noqa: PLC0206 default_kwargs[modality] = ModelProcessorKwargs._defaults.get(modality, {}).copy() # update defaults with arguments from tokenizer init - for modality_key in ModelProcessorKwargs.__annotations__[modality].__annotations__.keys(): + for modality_key in ModelProcessorKwargs.__annotations__[modality].__annotations__: # init with tokenizer init kwargs if necessary if tokenizer_init_kwargs is not None and modality_key in tokenizer_init_kwargs: value = ( @@ -1203,7 +1203,7 @@ class ProcessorMixin(PushToHubMixin): # update modality kwargs with passed kwargs non_modality_kwargs = set(kwargs) - set(output_kwargs) for modality, output_kwarg in output_kwargs.items(): - for modality_key in ModelProcessorKwargs.__annotations__[modality].__annotations__.keys(): + for modality_key in ModelProcessorKwargs.__annotations__[modality].__annotations__: # check if we received a structured kwarg dict or not to handle it correctly if modality in kwargs: kwarg_value = kwargs[modality].pop(modality_key, "__empty__") @@ -1236,7 +1236,7 @@ class ProcessorMixin(PushToHubMixin): # kwargs is a flat dictionary for key, kwarg in kwargs.items(): if key not in used_keys: - if key in ModelProcessorKwargs.__annotations__["common_kwargs"].__annotations__.keys(): + if key in ModelProcessorKwargs.__annotations__["common_kwargs"].__annotations__: output_kwargs["common_kwargs"][key] = kwarg elif key not in possible_modality_keywords: logger.warning_once( @@ -1350,7 +1350,7 @@ class ProcessorMixin(PushToHubMixin): classes = tuple(cls.get_possibly_dynamic_module(n) if n is not None else None for n in class_name) if attribute_name == "image_processor": # TODO: @yoni, change logic in v4.52 (when use_fast set to True by default) - use_fast = kwargs.get("use_fast", None) + use_fast = kwargs.get("use_fast") if use_fast is None: logger.warning_once( "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. " @@ -1495,7 +1495,7 @@ class ProcessorMixin(PushToHubMixin): } for kwarg_type in processed_kwargs: - for key in AllKwargsForChatTemplate.__annotations__[kwarg_type].__annotations__.keys(): + for key in AllKwargsForChatTemplate.__annotations__[kwarg_type].__annotations__: kwarg_type_defaults = AllKwargsForChatTemplate.__annotations__[kwarg_type] default_value = getattr(kwarg_type_defaults, key, None) value = kwargs.pop(key, default_value) @@ -1641,7 +1641,7 @@ class ProcessorMixin(PushToHubMixin): current_mask[token_id] = 1 assistant_masks.append(current_mask) out["assistant_masks"] = assistant_masks - out.convert_to_tensors(tensor_type=kwargs.get("return_tensors", None)) + out.convert_to_tensors(tensor_type=kwargs.get("return_tensors")) return out else: return out["input_ids"] diff --git a/src/transformers/quantizers/auto.py b/src/transformers/quantizers/auto.py index e4fbaadb5d..161951d340 100644 --- a/src/transformers/quantizers/auto.py +++ b/src/transformers/quantizers/auto.py @@ -116,7 +116,7 @@ class AutoQuantizationConfig: @classmethod def from_dict(cls, quantization_config_dict: dict): - quant_method = quantization_config_dict.get("quant_method", None) + quant_method = quantization_config_dict.get("quant_method") # We need a special care for bnb models to make sure everything is BC .. if quantization_config_dict.get("load_in_8bit", False) or quantization_config_dict.get("load_in_4bit", False): suffix = "_4bit" if quantization_config_dict.get("load_in_4bit", False) else "_8bit" @@ -126,7 +126,7 @@ class AutoQuantizationConfig: "The model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantized" ) - if quant_method not in AUTO_QUANTIZATION_CONFIG_MAPPING.keys(): + if quant_method not in AUTO_QUANTIZATION_CONFIG_MAPPING: raise ValueError( f"Unknown quantization type, got {quant_method} - supported types are:" f" {list(AUTO_QUANTIZER_MAPPING.keys())}" @@ -171,7 +171,7 @@ class AutoHfQuantizer: else: quant_method += "_4bit" - if quant_method not in AUTO_QUANTIZER_MAPPING.keys(): + if quant_method not in AUTO_QUANTIZER_MAPPING: raise ValueError( f"Unknown quantization type, got {quant_method} - supported types are:" f" {list(AUTO_QUANTIZER_MAPPING.keys())}" @@ -238,7 +238,7 @@ class AutoHfQuantizer: "The model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantized" ) - if quant_method not in AUTO_QUANTIZATION_CONFIG_MAPPING.keys(): + if quant_method not in AUTO_QUANTIZATION_CONFIG_MAPPING: logger.warning( f"Unknown quantization type, got {quant_method} - supported types are:" f" {list(AUTO_QUANTIZER_MAPPING.keys())}. Hence, we will skip the quantization. " diff --git a/src/transformers/quantizers/base.py b/src/transformers/quantizers/base.py index 0a4ddf6804..fb53c4c0f6 100644 --- a/src/transformers/quantizers/base.py +++ b/src/transformers/quantizers/base.py @@ -319,7 +319,7 @@ class HfQuantizer(ABC): for name, module in model.named_modules(): module_class_name = module.__class__.__name__ - if module_class_name in MODULES_TO_PATCH_FOR_QUANTIZATION.keys() and ( + if module_class_name in MODULES_TO_PATCH_FOR_QUANTIZATION and ( self.quantization_config.quant_method in MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["quantization_methods"] ): diff --git a/src/transformers/quantizers/quantizer_auto_round.py b/src/transformers/quantizers/quantizer_auto_round.py index 9f6783daee..2f73567860 100644 --- a/src/transformers/quantizers/quantizer_auto_round.py +++ b/src/transformers/quantizers/quantizer_auto_round.py @@ -42,7 +42,7 @@ class AutoRoundQuantizer(HfQuantizer): super().__init__(quantization_config, **kwargs) def validate_environment(self, *args, **kwargs): - self.device_map = kwargs.get("device_map", None) + self.device_map = kwargs.get("device_map") if not is_auto_round_available(): raise ImportError( "Loading an AutoRound quantized model requires auto-round library (`pip install 'auto-round>=0.5'`)" diff --git a/src/transformers/quantizers/quantizer_bitnet.py b/src/transformers/quantizers/quantizer_bitnet.py index 90cbab4303..a57e732b98 100644 --- a/src/transformers/quantizers/quantizer_bitnet.py +++ b/src/transformers/quantizers/quantizer_bitnet.py @@ -62,7 +62,7 @@ class BitNetHfQuantizer(HfQuantizer): ) return - device_map = kwargs.get("device_map", None) + device_map = kwargs.get("device_map") if device_map is None: logger.warning_once( "You have loaded a BitNet model on CPU and have a CUDA device available, make sure to set " diff --git a/src/transformers/quantizers/quantizer_bnb_4bit.py b/src/transformers/quantizers/quantizer_bnb_4bit.py index 409c5d47b0..fb268777cd 100644 --- a/src/transformers/quantizers/quantizer_bnb_4bit.py +++ b/src/transformers/quantizers/quantizer_bnb_4bit.py @@ -102,14 +102,14 @@ class Bnb4BitHfQuantizer(HfQuantizer): " sure the weights are in PyTorch format." ) - device_map = kwargs.get("device_map", None) + device_map = kwargs.get("device_map") if ( device_map is not None and isinstance(device_map, dict) and not self.quantization_config.llm_int8_enable_fp32_cpu_offload ): device_map_without_lm_head = { - key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert + key: device_map[key] for key in device_map if key not in self.modules_to_not_convert } if set(device_map.values()) == {"cpu"} and bnb_multibackend_is_enabled: pass diff --git a/src/transformers/quantizers/quantizer_bnb_8bit.py b/src/transformers/quantizers/quantizer_bnb_8bit.py index e7d914170e..3dc759dfd2 100644 --- a/src/transformers/quantizers/quantizer_bnb_8bit.py +++ b/src/transformers/quantizers/quantizer_bnb_8bit.py @@ -99,14 +99,14 @@ class Bnb8BitHfQuantizer(HfQuantizer): " sure the weights are in PyTorch format." ) - device_map = kwargs.get("device_map", None) + device_map = kwargs.get("device_map") if ( device_map is not None and isinstance(device_map, dict) and not self.quantization_config.llm_int8_enable_fp32_cpu_offload ): device_map_without_lm_head = { - key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert + key: device_map[key] for key in device_map if key not in self.modules_to_not_convert } if set(device_map.values()) == {"cpu"} and bnb_multibackend_is_enabled: pass @@ -177,7 +177,7 @@ class Bnb8BitHfQuantizer(HfQuantizer): module, tensor_name = get_module_from_name(model, param_name) if isinstance(module._parameters.get(tensor_name, None), bnb.nn.Int8Params): if self.pre_quantized: - if param_name.replace("weight", "SCB") not in state_dict.keys(): + if param_name.replace("weight", "SCB") not in state_dict: raise ValueError("Missing quantization component `SCB`") if param_value.dtype != torch.int8: raise ValueError( @@ -204,8 +204,8 @@ class Bnb8BitHfQuantizer(HfQuantizer): fp16_statistics_key = param_name.replace("weight", "SCB") fp16_weights_format_key = param_name.replace("weight", "weight_format") - fp16_statistics = state_dict.get(fp16_statistics_key, None) - fp16_weights_format = state_dict.get(fp16_weights_format_key, None) + fp16_statistics = state_dict.get(fp16_statistics_key) + fp16_weights_format = state_dict.get(fp16_weights_format_key) module, tensor_name = get_module_from_name(model, param_name) if tensor_name not in module._parameters: diff --git a/src/transformers/quantizers/quantizer_compressed_tensors.py b/src/transformers/quantizers/quantizer_compressed_tensors.py index 7df4fb7ca9..0e94d87f32 100644 --- a/src/transformers/quantizers/quantizer_compressed_tensors.py +++ b/src/transformers/quantizers/quantizer_compressed_tensors.py @@ -130,7 +130,7 @@ class CompressedTensorsHfQuantizer(HfQuantizer): if ( self.quantization_config.is_quantization_compressed and not self.run_compressed ) or self.quantization_config.is_sparsification_compressed: - config = kwargs.get("config", None) + config = kwargs.get("config") cache_path = config._name_or_path if not os.path.exists(cache_path): diff --git a/src/transformers/quantizers/quantizer_eetq.py b/src/transformers/quantizers/quantizer_eetq.py index 61c9390900..bb7a78c536 100644 --- a/src/transformers/quantizers/quantizer_eetq.py +++ b/src/transformers/quantizers/quantizer_eetq.py @@ -79,7 +79,7 @@ class EetqHfQuantizer(HfQuantizer): if not torch.cuda.is_available(): raise RuntimeError("No GPU found. A GPU is needed for quantization.") - device_map = kwargs.get("device_map", None) + device_map = kwargs.get("device_map") if device_map is None: logger.warning_once( "You have loaded an EETQ model on CPU and have a CUDA device available, make sure to set " diff --git a/src/transformers/quantizers/quantizer_fbgemm_fp8.py b/src/transformers/quantizers/quantizer_fbgemm_fp8.py index 003ea515b3..c00a141d67 100644 --- a/src/transformers/quantizers/quantizer_fbgemm_fp8.py +++ b/src/transformers/quantizers/quantizer_fbgemm_fp8.py @@ -71,7 +71,7 @@ class FbgemmFp8HfQuantizer(HfQuantizer): "FP8 quantized models is only supported on GPUs with compute capability >= 9.0 (e.g H100)" ) - device_map = kwargs.get("device_map", None) + device_map = kwargs.get("device_map") if device_map is None: logger.warning_once( "You have loaded an FP8 model on CPU and have a CUDA device available, make sure to set " @@ -231,7 +231,7 @@ class FbgemmFp8HfQuantizer(HfQuantizer): not_missing_keys = [] for name, module in model.named_modules(): - if isinstance(module, FbgemmFp8Linear) or isinstance(module, FbgemmFp8Llama4TextExperts): + if isinstance(module, (FbgemmFp8Linear, FbgemmFp8Llama4TextExperts)): for missing in missing_keys: if ( (name in missing or name in f"{prefix}.{missing}") diff --git a/src/transformers/quantizers/quantizer_finegrained_fp8.py b/src/transformers/quantizers/quantizer_finegrained_fp8.py index 1a2ab58d98..58c747effa 100644 --- a/src/transformers/quantizers/quantizer_finegrained_fp8.py +++ b/src/transformers/quantizers/quantizer_finegrained_fp8.py @@ -56,7 +56,7 @@ class FineGrainedFP8HfQuantizer(HfQuantizer): f", actual = `{major}.{minor}`" ) - device_map = kwargs.get("device_map", None) + device_map = kwargs.get("device_map") if device_map is None: logger.warning_once( "You have loaded an FP8 model on CPU and have a CUDA device available, make sure to set " diff --git a/src/transformers/quantizers/quantizer_hqq.py b/src/transformers/quantizers/quantizer_hqq.py index 4c6a64705b..f52ac36a8d 100755 --- a/src/transformers/quantizers/quantizer_hqq.py +++ b/src/transformers/quantizers/quantizer_hqq.py @@ -78,7 +78,7 @@ class HqqHfQuantizer(HfQuantizer): self.torch_dtype = torch.float32 logger.info("Setting torch_dtype to torch.float32 as the default value since it was not specified.") - device_map = kwargs.get("device_map", None) + device_map = kwargs.get("device_map") if isinstance(device_map, dict): if "cpu" in device_map.values() or "disk" in device_map.values(): raise ValueError( @@ -171,7 +171,7 @@ class HqqHfQuantizer(HfQuantizer): module, tensor_name = get_module_from_name(model, param_name) if self.pre_quantized: - return (isinstance(module, torch.nn.Linear) or isinstance(module, HQQLinear)) and tensor_name != "weight" + return (isinstance(module, (torch.nn.Linear, HQQLinear))) and tensor_name != "weight" else: return ( isinstance(module, torch.nn.Linear) diff --git a/src/transformers/quantizers/quantizer_quanto.py b/src/transformers/quantizers/quantizer_quanto.py index 675d54ed2f..d273ddda59 100644 --- a/src/transformers/quantizers/quantizer_quanto.py +++ b/src/transformers/quantizers/quantizer_quanto.py @@ -117,8 +117,8 @@ class QuantoHfQuantizer(HfQuantizer): if is_optimum_quanto_available(): from optimum.quanto import QModuleMixin - device_map = kwargs.get("device_map", None) - param_device = kwargs.get("param_device", None) + device_map = kwargs.get("device_map") + param_device = kwargs.get("param_device") # we don't quantize the model if the module is going to be offloaded to the cpu if device_map is not None and param_device is not None: device_map_values = set(device_map.values()) diff --git a/src/transformers/quantizers/quantizer_torchao.py b/src/transformers/quantizers/quantizer_torchao.py index c0b337b566..23c9d7e327 100644 --- a/src/transformers/quantizers/quantizer_torchao.py +++ b/src/transformers/quantizers/quantizer_torchao.py @@ -98,7 +98,7 @@ class TorchAoHfQuantizer(HfQuantizer): raise ImportError("Loading an torchao quantized model requires torchao library (`pip install torchao`)") self.offload = False - device_map = kwargs.get("device_map", None) + device_map = kwargs.get("device_map") if isinstance(device_map, dict): if "cpu" in device_map.values() or "disk" in device_map.values(): if self.pre_quantized: @@ -109,7 +109,7 @@ class TorchAoHfQuantizer(HfQuantizer): else: self.offload = True if self.pre_quantized: - weights_only = kwargs.get("weights_only", None) + weights_only = kwargs.get("weights_only") if weights_only: torch_version = version.parse(importlib.metadata.version("torch")) if torch_version < version.parse("2.5.0"): diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index fd5f62ec28..a41ea8166e 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -3304,7 +3304,7 @@ def compare_pipeline_output_to_hub_spec(output, hub_spec): missing_keys = [] unexpected_keys = [] all_field_names = {field.name for field in fields(hub_spec)} - matching_keys = sorted([key for key in output.keys() if key in all_field_names]) + matching_keys = sorted([key for key in output if key in all_field_names]) # Fields with a MISSING default are required and must be in the output for field in fields(hub_spec): diff --git a/src/transformers/tokenization_mistral_common.py b/src/transformers/tokenization_mistral_common.py index 95bd64049b..cd85a641cb 100644 --- a/src/transformers/tokenization_mistral_common.py +++ b/src/transformers/tokenization_mistral_common.py @@ -1203,7 +1203,7 @@ class MistralCommonTokenizer(PushToHubMixin): # If we have a list of dicts, let's convert it in a dict of lists # We do this to allow using this method as a collate_fn function in PyTorch Dataloader if isinstance(encoded_inputs, (list, tuple)) and isinstance(encoded_inputs[0], Mapping): - encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0].keys()} + encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0]} # The model's main input name, usually `input_ids`, has been passed for padding if self.model_input_names[0] not in encoded_inputs: @@ -1449,7 +1449,7 @@ class MistralCommonTokenizer(PushToHubMixin): if not isinstance(message, dict): return maybe_list_content: Optional[Union[str, list[dict[str, Union[str, dict[str, Any]]]]]] = message.get( - "content", None + "content" ) if not maybe_list_content or isinstance(maybe_list_content, str): return diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index a8d0336b46..beb91a9472 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -271,7 +271,7 @@ class BatchEncoding(UserDict): elif self._encodings is not None: return self._encodings[item] elif isinstance(item, slice): - return {key: self.data[key][item] for key in self.data.keys()} + return {key: self.data[key][item] for key in self.data} else: raise KeyError( "Invalid key. Only three types of key are available: " @@ -1846,7 +1846,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): from_pipeline = kwargs.pop("_from_pipeline", None) from_auto_class = kwargs.pop("_from_auto", False) commit_hash = kwargs.pop("_commit_hash", None) - gguf_file = kwargs.get("gguf_file", None) + gguf_file = kwargs.get("gguf_file") if use_auth_token is not None: warnings.warn( @@ -2042,7 +2042,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): # We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json # file or if `from_slow` is set to True. from_slow = kwargs.get("from_slow", False) - gguf_file = kwargs.get("gguf_file", None) + gguf_file = kwargs.get("gguf_file") has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None # If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be @@ -2136,7 +2136,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): else: # Fallback: use pattern matching on the string. model_type = None - for pattern in TOKENIZER_MAPPING_NAMES.keys(): + for pattern in TOKENIZER_MAPPING_NAMES: if pattern in str(pretrained_model_name_or_path): model_type = pattern break @@ -2415,7 +2415,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) @@ -2464,7 +2464,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): if len(self.init_inputs) > 0: tokenizer_config["init_inputs"] = copy.deepcopy(self.init_inputs) - for file_id in self.vocab_files_names.keys(): + for file_id in self.vocab_files_names: tokenizer_config.pop(file_id, None) # no typefields, this way old fast and slow can load it @@ -3283,7 +3283,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): # If we have a list of dicts, let's convert it in a dict of lists # We do this to allow using this method as a collate_fn function in PyTorch Dataloader if isinstance(encoded_inputs, (list, tuple)) and isinstance(encoded_inputs[0], Mapping): - encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0].keys()} + encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0]} # The model's main input name, usually `input_ids`, has been passed for padding if self.model_input_names[0] not in encoded_inputs: diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 3fecfa0e1d..4ca5bed605 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -134,7 +134,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): fast_tokenizer = convert_slow_tokenizer(slow_tokenizer) elif not slow_tokenizer: # We tried loading a slow_tokenizer with spm and failed, try to load with tiktoken - self.vocab_file = kwargs.get("vocab_file", None) + self.vocab_file = kwargs.get("vocab_file") self.additional_special_tokens = kwargs.get("additional_special_tokens", []) fast_tokenizer = convert_slow_tokenizer(self, from_tiktoken=True) slow_tokenizer = None @@ -583,7 +583,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): # To match each overflowing sample with the original sample in the batch # we add an overflow_to_sample_mapping array (see below) sanitized_tokens = {} - for key in tokens_and_encodings[0][0].keys(): + for key in tokens_and_encodings[0][0]: stack = [e for item, _ in tokens_and_encodings for e in item[key]] sanitized_tokens[key] = stack sanitized_encodings = [e for _, item in tokens_and_encodings for e in item] diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 52dc9c3557..c0d4c12db5 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -438,7 +438,7 @@ class Trainer: logger.info(f"No `TrainingArguments` passed, using `output_dir={output_dir}`.") args = TrainingArguments(output_dir=output_dir) if args.batch_eval_metrics and compute_metrics is not None: - if "compute_result" not in inspect.signature(compute_metrics).parameters.keys(): + if "compute_result" not in inspect.signature(compute_metrics).parameters: raise ValueError( "When using `batch_eval_metrics`, your `compute_metrics` function must take a `compute_result`" " boolean argument which will be triggered after the last batch of the eval set to signal that the" @@ -4614,7 +4614,7 @@ class Trainer: # For CLIP-like models capable of returning loss values. # If `return_loss` is not specified or being `None` in `inputs`, we check if the default value of `return_loss` # is `True` in `model.forward`. - return_loss = inputs.get("return_loss", None) + return_loss = inputs.get("return_loss") if return_loss is None: return_loss = self.can_return_loss loss_without_labels = True if len(self.label_names) == 0 and return_loss else False @@ -5259,7 +5259,7 @@ class Trainer: # some Trainer classes need to use `gather` instead of `gather_for_metrics`, thus we store a flag self.gather_function = self.accelerator.gather_for_metrics - if "use_gather_object" in inspect.signature(self.gather_function).parameters.keys(): + if "use_gather_object" in inspect.signature(self.gather_function).parameters: self.gather_function = functools.partial( self.gather_function, use_gather_object=self.args.eval_use_gather_object ) diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py index 84f65c9544..e93e279052 100644 --- a/src/transformers/trainer_pt_utils.py +++ b/src/transformers/trainer_pt_utils.py @@ -1047,7 +1047,7 @@ def log_metrics(self, split, metrics): print(f"***** {split} metrics *****") metrics_formatted = self.metrics_format(metrics) - k_width = max(len(str(x)) for x in metrics_formatted.keys()) + k_width = max(len(str(x)) for x in metrics_formatted) v_width = max(len(str(x)) for x in metrics_formatted.values()) for key in sorted(metrics_formatted.keys()): print(f" {key: <{k_width}} = {metrics_formatted[key]:>{v_width}}") @@ -1139,9 +1139,7 @@ def get_parameter_names(model, forbidden_layer_types, forbidden_layer_names=None ] # Add model specific parameters that are not in any child result += [ - k - for k in model._parameters.keys() - if not any(pattern.search(k.lower()) for pattern in forbidden_layer_patterns) + k for k in model._parameters if not any(pattern.search(k.lower()) for pattern in forbidden_layer_patterns) ] return result @@ -1333,7 +1331,7 @@ class AcceleratorConfig: with open_file(json_file, "r", encoding="utf-8") as f: config_dict = json.load(f) # Check for keys and load sensible defaults - extra_keys = sorted(key for key in config_dict.keys() if key not in cls.__dataclass_fields__.keys()) + extra_keys = sorted(key for key in config_dict if key not in cls.__dataclass_fields__) if len(extra_keys) > 0: raise ValueError( f"The config file at {json_file} had unknown keys ({extra_keys}), please try upgrading your `transformers`" diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index 317e50fb68..c435c1f6df 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -288,9 +288,7 @@ def default_compute_objective(metrics: dict[str, float]) -> float: _ = metrics.pop("epoch", None) # Remove speed metrics speed_metrics = [ - m - for m in metrics.keys() - if m.endswith("_runtime") or m.endswith("_per_second") or m.endswith("_compilation_time") + m for m in metrics if m.endswith("_runtime") or m.endswith("_per_second") or m.endswith("_compilation_time") ] for sm in speed_metrics: _ = metrics.pop(sm, None) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 8314e27134..a9bbebab94 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -2532,7 +2532,7 @@ class TrainingArguments: converts torch.dtype to a string of just the type. For example, `torch.float32` get converted into *"float32"* string, which can then be stored in the json format. """ - if d.get("torch_dtype", None) is not None and not isinstance(d["torch_dtype"], str): + if d.get("torch_dtype") is not None and not isinstance(d["torch_dtype"], str): d["torch_dtype"] = str(d["torch_dtype"]).split(".")[1] for value in d.values(): if isinstance(value, dict): diff --git a/src/transformers/utils/attention_visualizer.py b/src/transformers/utils/attention_visualizer.py index d9a4041f7f..8f5b9bc827 100644 --- a/src/transformers/utils/attention_visualizer.py +++ b/src/transformers/utils/attention_visualizer.py @@ -233,7 +233,7 @@ class AttentionMaskVisualizer: attention_mask, img_token=self.image_token, sliding_window=getattr(self.config, "sliding_window", None), - token_type_ids=kwargs.get("token_type_ids", None), + token_type_ids=kwargs.get("token_type_ids"), image_seq_length=image_seq_length, ) print(f_string) diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index e13d3bbe28..335a5d5717 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -830,7 +830,7 @@ def filter_out_non_signature_kwargs(extra: Optional[list] = None): invalid_kwargs[k] = v if invalid_kwargs: - invalid_kwargs_names = [f"'{k}'" for k in invalid_kwargs.keys()] + invalid_kwargs_names = [f"'{k}'" for k in invalid_kwargs] invalid_kwargs_names = ", ".join(invalid_kwargs_names) # Get the class name for better warning message @@ -993,7 +993,7 @@ def check_model_inputs(func): @wraps(func) def wrapper(self, *args, **kwargs): - use_cache = kwargs.get("use_cache", None) + use_cache = kwargs.get("use_cache") if use_cache is None: use_cache = getattr(self.config, "use_cache", False) diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index fb9d56e160..106e8abe11 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -939,9 +939,9 @@ def is_torch_hpu_available(): original_compile = torch.compile def hpu_backend_compile(*args, **kwargs): - if kwargs.get("backend", None) not in ["hpu_backend", "eager"]: + if kwargs.get("backend") not in ["hpu_backend", "eager"]: logger.warning( - f"Calling torch.compile with backend={kwargs.get('backend', None)} on a Gaudi device is not supported. " + f"Calling torch.compile with backend={kwargs.get('backend')} on a Gaudi device is not supported. " "We will override the backend with 'hpu_backend' to avoid errors." ) kwargs["backend"] = "hpu_backend" @@ -2149,7 +2149,7 @@ class _LazyModule(ModuleType): self._object_missing_backend = {} self._explicit_import_shortcut = explicit_import_shortcut if explicit_import_shortcut else {} - if any(isinstance(key, frozenset) for key in import_structure.keys()): + if any(isinstance(key, frozenset) for key in import_structure): self._modules = set() self._class_to_module = {} self.__all__ = [] @@ -2247,7 +2247,7 @@ class _LazyModule(ModuleType): def __getattr__(self, name: str) -> Any: if name in self._objects: return self._objects[name] - if name in self._object_missing_backend.keys(): + if name in self._object_missing_backend: missing_backends = self._object_missing_backend[name] class Placeholder(metaclass=DummyObject): @@ -2271,7 +2271,7 @@ class _LazyModule(ModuleType): Placeholder.__module__ = module_name value = Placeholder - elif name in self._class_to_module.keys(): + elif name in self._class_to_module: try: module = self._get_module(self._class_to_module[name]) value = getattr(module, name) @@ -2726,7 +2726,7 @@ def spread_import_structure(nested_import_structure): if not isinstance(_value, dict): frozenset_first_import_structure[_key] = _value - elif any(isinstance(v, frozenset) for v in _value.keys()): + elif any(isinstance(v, frozenset) for v in _value): for k, v in _value.items(): if isinstance(k, frozenset): # Here we want to switch around _key and k to propagate k upstream if it is a frozenset diff --git a/src/transformers/utils/notebook.py b/src/transformers/utils/notebook.py index 42e24c7781..b270a14105 100644 --- a/src/transformers/utils/notebook.py +++ b/src/transformers/utils/notebook.py @@ -244,7 +244,7 @@ class NotebookTrainingTracker(NotebookProgressBar): self.inner_table = [list(values.keys()), list(values.values())] else: columns = self.inner_table[0] - for key in values.keys(): + for key in values: if key not in columns: columns.append(key) self.inner_table[0] = columns @@ -258,7 +258,7 @@ class NotebookTrainingTracker(NotebookProgressBar): # update last line new_values = values for c in columns: - if c not in new_values.keys(): + if c not in new_values: new_values[c] = last_values[columns.index(c)] self.inner_table[-1] = [new_values[c] for c in columns] else: diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py index 0db55024e6..b59b387485 100644 --- a/src/transformers/video_processing_utils.py +++ b/src/transformers/video_processing_utils.py @@ -553,7 +553,7 @@ class BaseVideoProcessor(BaseImageProcessorFast): "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", FutureWarning, ) - if kwargs.get("token", None) is not None: + if kwargs.get("token") is not None: raise ValueError( "`token` and `use_auth_token` are both specified. Please set only the argument `token`." ) diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index dc087559d4..a31c23adfe 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -83,7 +83,7 @@ class TestTrainerExt(TestCasePlus): if not do_eval: self.skipTest(reason="do_eval is False") - eval_metrics = [log for log in logs if "eval_loss" in log.keys()] + eval_metrics = [log for log in logs if "eval_loss" in log] first_step_stats = eval_metrics[0] if predict_with_generate: @@ -168,7 +168,7 @@ class TestTrainerExt(TestCasePlus): # Check metrics logs = TrainerState.load_from_json(os.path.join(output_dir, "trainer_state.json")).log_history - eval_metrics = [log for log in logs if "eval_loss" in log.keys()] + eval_metrics = [log for log in logs if "eval_loss" in log] first_step_stats = eval_metrics[0] last_step_stats = eval_metrics[-1] diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 0f94837b78..d73bfa2274 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1440,7 +1440,7 @@ class GenerationTesterMixin: # added support for it yet. We skip these models for now. has_encoder_attributes = any( attr_name - for attr_name in config.to_dict().keys() + for attr_name in config.to_dict() if attr_name.startswith("encoder") and attr_name != "encoder_no_repeat_ngram_size" ) if has_encoder_attributes: @@ -1644,7 +1644,7 @@ class GenerationTesterMixin: config.is_decoder = True model = model_class(config).to(torch_device).eval() - if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters.keys(): + if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters: continue # No easy fix, let's skip the test for now @@ -1701,7 +1701,7 @@ class GenerationTesterMixin: # Skip models without explicit support model = model_class(config).to(torch_device).eval() set_model_for_less_flaky_test(model) - if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters.keys(): + if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters: continue # There are a few exception patterns in this test: @@ -1773,7 +1773,7 @@ class GenerationTesterMixin: self.skipTest(reason="This model is encoder-decoder and has Encoder-Decoder Cache") model = model_class(config).to(torch_device).eval() - if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters.keys(): + if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters: self.skipTest(reason="This model does not support `inputs_embeds` in generation") input_ids = inputs_dict.pop("input_ids") @@ -1929,7 +1929,7 @@ class GenerationTesterMixin: model = model_class(config).to(torch_device).eval() - if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters.keys(): + if "inputs_embeds" not in inspect.signature(model.prepare_inputs_for_generation).parameters: self.skipTest(reason="This model does not support `inputs_embeds` in generation") # If "past_key_values" is not returned, skip the test (e.g. RWKV uses a different cache name and format) diff --git a/tests/models/align/test_modeling_align.py b/tests/models/align/test_modeling_align.py index 4e897a554c..167cb1ff7c 100644 --- a/tests/models/align/test_modeling_align.py +++ b/tests/models/align/test_modeling_align.py @@ -562,8 +562,8 @@ class AlignModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/align/test_processor_align.py b/tests/models/align/test_processor_align.py index 73e0d1df91..9205e5f4f2 100644 --- a/tests/models/align/test_processor_align.py +++ b/tests/models/align/test_processor_align.py @@ -135,7 +135,7 @@ class AlignProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -149,7 +149,7 @@ class AlignProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_processor = processor(text=input_str) encoded_tok = tokenizer(input_str, padding="max_length", max_length=64) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/altclip/test_modeling_altclip.py b/tests/models/altclip/test_modeling_altclip.py index d1656d06fe..2a36470051 100755 --- a/tests/models/altclip/test_modeling_altclip.py +++ b/tests/models/altclip/test_modeling_altclip.py @@ -532,8 +532,8 @@ class AltCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/bamba/test_modeling_bamba.py b/tests/models/bamba/test_modeling_bamba.py index cf660b7fa0..245163d672 100644 --- a/tests/models/bamba/test_modeling_bamba.py +++ b/tests/models/bamba/test_modeling_bamba.py @@ -448,7 +448,7 @@ class BambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi # added support for it yet. We skip these models for now. has_encoder_attributes = any( attr_name - for attr_name in config.to_dict().keys() + for attr_name in config.to_dict() if attr_name.startswith("encoder") and attr_name != "encoder_no_repeat_ngram_size" ) if has_encoder_attributes: diff --git a/tests/models/bark/test_processor_bark.py b/tests/models/bark/test_processor_bark.py index 15b0871d81..447d38b956 100644 --- a/tests/models/bark/test_processor_bark.py +++ b/tests/models/bark/test_processor_bark.py @@ -123,5 +123,5 @@ class BarkProcessorTest(unittest.TestCase): return_token_type_ids=False, ) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key].squeeze().tolist()) diff --git a/tests/models/biogpt/test_modeling_biogpt.py b/tests/models/biogpt/test_modeling_biogpt.py index 232f7176b2..26f2053a93 100644 --- a/tests/models/biogpt/test_modeling_biogpt.py +++ b/tests/models/biogpt/test_modeling_biogpt.py @@ -224,7 +224,7 @@ class BioGptModelTester: def create_and_check_biogpt_weight_initialization(self, config, *args): model = BioGptModel(config) model_std = model.config.initializer_range / math.sqrt(2 * model.config.num_hidden_layers) - for key in model.state_dict().keys(): + for key in model.state_dict(): if "c_proj" in key and "weight" in key: self.parent.assertLessEqual(abs(torch.std(model.state_dict()[key]) - model_std), 0.001) self.parent.assertLessEqual(abs(torch.mean(model.state_dict()[key]) - 0.0), 0.01) diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py index d340d3e569..e8cdb26cc4 100644 --- a/tests/models/blip/test_modeling_blip.py +++ b/tests/models/blip/test_modeling_blip.py @@ -536,8 +536,8 @@ class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { @@ -1056,8 +1056,8 @@ class BlipTextRetrievalModelTest(ModelTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { @@ -1274,8 +1274,8 @@ class BlipTextImageModelTest(ModelTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/blip/test_processor_blip.py b/tests/models/blip/test_processor_blip.py index 6ff81e991a..254eede275 100644 --- a/tests/models/blip/test_processor_blip.py +++ b/tests/models/blip/test_processor_blip.py @@ -81,7 +81,7 @@ class BlipProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -96,7 +96,7 @@ class BlipProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str, return_token_type_ids=False) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py index d5606ac705..952595bbe6 100644 --- a/tests/models/blip_2/test_modeling_blip_2.py +++ b/tests/models/blip_2/test_modeling_blip_2.py @@ -627,7 +627,7 @@ class Blip2ForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, GenerationT # added support for it yet. We skip these models for now. has_encoder_attributes = any( attr_name - for attr_name in config.to_dict().keys() + for attr_name in config.to_dict() if attr_name.startswith("encoder") and attr_name != "encoder_no_repeat_ngram_size" ) if has_encoder_attributes: diff --git a/tests/models/blip_2/test_processor_blip_2.py b/tests/models/blip_2/test_processor_blip_2.py index d2b63f8e40..a24ccc4aeb 100644 --- a/tests/models/blip_2/test_processor_blip_2.py +++ b/tests/models/blip_2/test_processor_blip_2.py @@ -84,7 +84,7 @@ class Blip2ProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_processor(self): diff --git a/tests/models/bloom/test_modeling_bloom.py b/tests/models/bloom/test_modeling_bloom.py index 787a99c932..0c685f0536 100644 --- a/tests/models/bloom/test_modeling_bloom.py +++ b/tests/models/bloom/test_modeling_bloom.py @@ -290,7 +290,7 @@ class BloomModelTester: def create_and_check_bloom_weight_initialization(self, config, *args): model = BloomModel(config) model_std = model.config.initializer_range / math.sqrt(2 * model.config.n_layer) - for key in model.state_dict().keys(): + for key in model.state_dict(): if "c_proj" in key and "weight" in key: self.parent.assertLessEqual(abs(torch.std(model.state_dict()[key]) - model_std), 0.001) self.parent.assertLessEqual(abs(torch.mean(model.state_dict()[key]) - 0.0), 0.01) @@ -747,7 +747,7 @@ class BloomEmbeddingTest(unittest.TestCase): output_dict["max"][idx] = embeddings.max(dim=-1).values[0][i].item() output_dict["mean"][idx] = embeddings.mean(dim=-1)[0][i].item() - for key in TEST_EMBEDDINGS[str(model.dtype)].keys(): + for key in TEST_EMBEDDINGS[str(model.dtype)]: self.assertDictEqual(TEST_EMBEDDINGS[str(model.dtype)][key], output_dict[key]) output_dict_norm = {"min": {}, "max": {}, "mean": {}} diff --git a/tests/models/chinese_clip/test_modeling_chinese_clip.py b/tests/models/chinese_clip/test_modeling_chinese_clip.py index 7ec9dd42dc..dc8e9a145b 100644 --- a/tests/models/chinese_clip/test_modeling_chinese_clip.py +++ b/tests/models/chinese_clip/test_modeling_chinese_clip.py @@ -649,8 +649,8 @@ class ChineseCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/chinese_clip/test_processor_chinese_clip.py b/tests/models/chinese_clip/test_processor_chinese_clip.py index 3c2a2247c5..83f68c0361 100644 --- a/tests/models/chinese_clip/test_processor_chinese_clip.py +++ b/tests/models/chinese_clip/test_processor_chinese_clip.py @@ -155,7 +155,7 @@ class ChineseCLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -170,7 +170,7 @@ class ChineseCLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/clap/test_modeling_clap.py b/tests/models/clap/test_modeling_clap.py index 22579fa99f..0dab34123d 100644 --- a/tests/models/clap/test_modeling_clap.py +++ b/tests/models/clap/test_modeling_clap.py @@ -594,8 +594,8 @@ class ClapModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/clap/test_processor_clap.py b/tests/models/clap/test_processor_clap.py index 49e9972ea0..43192cee2e 100644 --- a/tests/models/clap/test_processor_clap.py +++ b/tests/models/clap/test_processor_clap.py @@ -81,7 +81,7 @@ class ClapProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(audios=raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -96,7 +96,7 @@ class ClapProcessorTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_decode(self): diff --git a/tests/models/clip/test_modeling_clip.py b/tests/models/clip/test_modeling_clip.py index 92de6d4583..ad7d817f96 100644 --- a/tests/models/clip/test_modeling_clip.py +++ b/tests/models/clip/test_modeling_clip.py @@ -641,8 +641,8 @@ class CLIPModelTest(CLIPModelTesterMixin, PipelineTesterMixin, unittest.TestCase loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/clip/test_processor_clip.py b/tests/models/clip/test_processor_clip.py index 1b6eed7534..bb7fae4a86 100644 --- a/tests/models/clip/test_processor_clip.py +++ b/tests/models/clip/test_processor_clip.py @@ -137,7 +137,7 @@ class CLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -152,7 +152,7 @@ class CLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/clipseg/test_modeling_clipseg.py b/tests/models/clipseg/test_modeling_clipseg.py index dcbaeeb661..08a21f9dcf 100644 --- a/tests/models/clipseg/test_modeling_clipseg.py +++ b/tests/models/clipseg/test_modeling_clipseg.py @@ -562,8 +562,8 @@ class CLIPSegModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/clipseg/test_processor_clipseg.py b/tests/models/clipseg/test_processor_clipseg.py index 5147ed3975..f7255838ca 100644 --- a/tests/models/clipseg/test_processor_clipseg.py +++ b/tests/models/clipseg/test_processor_clipseg.py @@ -128,7 +128,7 @@ class CLIPSegProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -143,7 +143,7 @@ class CLIPSegProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor_text(self): diff --git a/tests/models/clvp/test_processor_clvp.py b/tests/models/clvp/test_processor_clvp.py index f751ab92d0..817cbbff88 100644 --- a/tests/models/clvp/test_processor_clvp.py +++ b/tests/models/clvp/test_processor_clvp.py @@ -71,7 +71,7 @@ class ClvpProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech=raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp @@ -87,7 +87,7 @@ class ClvpProcessorTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp diff --git a/tests/models/csm/test_processor_csm.py b/tests/models/csm/test_processor_csm.py index 2abb7eb2d6..910a0e7868 100644 --- a/tests/models/csm/test_processor_csm.py +++ b/tests/models/csm/test_processor_csm.py @@ -57,7 +57,7 @@ class CsmProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor_loaded = self.processor_class.from_pretrained(self.tmpdirname) processor_dict_loaded = json.loads(processor_loaded.to_json_string()) # chat templates aren't serialized to json in processors - self.assertFalse("chat_template" in processor_dict_loaded.keys()) + self.assertFalse("chat_template" in processor_dict_loaded) # they have to be saved as separate file and loaded back from that file # so we check if the same template is loaded diff --git a/tests/models/d_fine/test_modeling_d_fine.py b/tests/models/d_fine/test_modeling_d_fine.py index b26db579d0..94429260ec 100644 --- a/tests/models/d_fine/test_modeling_d_fine.py +++ b/tests/models/d_fine/test_modeling_d_fine.py @@ -647,7 +647,7 @@ class DFineModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): # Skip the check for the backbone for name, module in model.named_modules(): if module.__class__.__name__ == "DFineConvEncoder": - backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()] + backbone_params = [f"{name}.{key}" for key in module.state_dict()] break for name, param in model.named_parameters(): diff --git a/tests/models/dac/test_modeling_dac.py b/tests/models/dac/test_modeling_dac.py index 93f61f4186..bfd6e7416b 100644 --- a/tests/models/dac/test_modeling_dac.py +++ b/tests/models/dac/test_modeling_dac.py @@ -228,8 +228,8 @@ class DacModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py index fc30b10e14..df4ebd3614 100644 --- a/tests/models/deformable_detr/test_modeling_deformable_detr.py +++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py @@ -770,7 +770,7 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase): # 3. assert equivalence # (on A10, the differences get larger than on T4) - for key in cpu_outputs.keys(): + for key in cpu_outputs: torch.testing.assert_close(cpu_outputs[key], gpu_outputs[key].cpu(), atol=2e-2, rtol=2e-2) expected_logits = torch.tensor( diff --git a/tests/models/dia/test_processor_dia.py b/tests/models/dia/test_processor_dia.py index 8ce15f4330..b015e3c197 100644 --- a/tests/models/dia/test_processor_dia.py +++ b/tests/models/dia/test_processor_dia.py @@ -135,7 +135,7 @@ class DiaProcessorTest(unittest.TestCase): input_tokenizer = tokenizer(random_text, padding=True, return_tensors="pt") input_processor = self.processor(random_text) - for key in input_tokenizer.keys(): + for key in input_tokenizer: self.assertTrue((input_tokenizer[key] == input_processor[key]).all()) def test_no_audio(self): diff --git a/tests/models/dpt/test_modeling_dpt.py b/tests/models/dpt/test_modeling_dpt.py index eb968ad9f6..8b47a18a71 100644 --- a/tests/models/dpt/test_modeling_dpt.py +++ b/tests/models/dpt/test_modeling_dpt.py @@ -268,7 +268,7 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): backbone_params = [] for name, module in model.named_modules(): if module.__class__.__name__ == "DPTViTHybridEmbeddings": - backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()] + backbone_params = [f"{name}.{key}" for key in module.state_dict()] break for name, param in model.named_parameters(): diff --git a/tests/models/dpt/test_modeling_dpt_auto_backbone.py b/tests/models/dpt/test_modeling_dpt_auto_backbone.py index 1505be27cf..b9068631bd 100644 --- a/tests/models/dpt/test_modeling_dpt_auto_backbone.py +++ b/tests/models/dpt/test_modeling_dpt_auto_backbone.py @@ -205,7 +205,7 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): backbone_params = [] for name, module in model.named_modules(): if module.__class__.__name__ == "DPTViTHybridEmbeddings": - backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()] + backbone_params = [f"{name}.{key}" for key in module.state_dict()] break for name, param in model.named_parameters(): diff --git a/tests/models/dpt/test_modeling_dpt_hybrid.py b/tests/models/dpt/test_modeling_dpt_hybrid.py index 79cad886db..e7a184c400 100644 --- a/tests/models/dpt/test_modeling_dpt_hybrid.py +++ b/tests/models/dpt/test_modeling_dpt_hybrid.py @@ -281,7 +281,7 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): backbone_params = [] for name, module in model.named_modules(): if module.__class__.__name__ == "DPTViTHybridEmbeddings": - backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()] + backbone_params = [f"{name}.{key}" for key in module.state_dict()] break for name, param in model.named_parameters(): diff --git a/tests/models/encodec/test_modeling_encodec.py b/tests/models/encodec/test_modeling_encodec.py index 157e174631..6dae1643d2 100644 --- a/tests/models/encodec/test_modeling_encodec.py +++ b/tests/models/encodec/test_modeling_encodec.py @@ -258,8 +258,8 @@ class EncodecModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/falcon_h1/test_modeling_falcon_h1.py b/tests/models/falcon_h1/test_modeling_falcon_h1.py index 5c538ac4b4..37afc2cceb 100644 --- a/tests/models/falcon_h1/test_modeling_falcon_h1.py +++ b/tests/models/falcon_h1/test_modeling_falcon_h1.py @@ -460,7 +460,7 @@ class FalconH1ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM # added support for it yet. We skip these models for now. has_encoder_attributes = any( attr_name - for attr_name in config.to_dict().keys() + for attr_name in config.to_dict() if attr_name.startswith("encoder") and attr_name != "encoder_no_repeat_ngram_size" ) if has_encoder_attributes: diff --git a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py index 855d0f2103..9686b1660f 100644 --- a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py +++ b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py @@ -292,7 +292,7 @@ class FalconMambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest """ if isinstance(member, torch.Tensor): max_value, min_value = member.max().item(), member.min().item() - elif isinstance(member, list) or isinstance(member, tuple): + elif isinstance(member, (list, tuple)): max_value, min_value = max(member), min(member) if not isinstance(container, list): diff --git a/tests/models/flava/test_modeling_flava.py b/tests/models/flava/test_modeling_flava.py index f5cafdc957..896ce25695 100644 --- a/tests/models/flava/test_modeling_flava.py +++ b/tests/models/flava/test_modeling_flava.py @@ -1001,8 +1001,8 @@ class FlavaModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): loaded_model_state_dict.pop("text_model.embeddings.token_type_ids", None) non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/flava/test_processor_flava.py b/tests/models/flava/test_processor_flava.py index 8489322efd..afba82b383 100644 --- a/tests/models/flava/test_processor_flava.py +++ b/tests/models/flava/test_processor_flava.py @@ -144,7 +144,7 @@ class FlavaProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) # With rest of the args @@ -157,7 +157,7 @@ class FlavaProcessorTest(ProcessorTesterMixin, unittest.TestCase): images=image_input, return_image_mask=True, return_codebook_pixels=True, return_tensors="np" ) - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -172,7 +172,7 @@ class FlavaProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/gemma3n/test_processing_gemma3n.py b/tests/models/gemma3n/test_processing_gemma3n.py index ffedb2b98a..5590be01ba 100644 --- a/tests/models/gemma3n/test_processing_gemma3n.py +++ b/tests/models/gemma3n/test_processing_gemma3n.py @@ -116,7 +116,7 @@ class Gemma3nProcessorTest(unittest.TestCase): input_image_processor = image_processor(raw_image, return_tensors="pt") input_processor = processor(text="Describe:", images=raw_image, return_tensors="pt") - for key in input_image_processor.keys(): + for key in input_image_processor: self.assertAlmostEqual(input_image_processor[key].sum(), input_processor[key].sum(), delta=1e-2) if "pixel_values" in key: # NOTE: all images should be re-scaled to 768x768 @@ -135,7 +135,7 @@ class Gemma3nProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="pt") input_processor = processor(text="Transcribe:", audio=raw_speech, return_tensors="pt") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -152,7 +152,7 @@ class Gemma3nProcessorTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key][0]) def test_tokenizer_decode(self): diff --git a/tests/models/git/test_processor_git.py b/tests/models/git/test_processor_git.py index c15301a587..65b7819319 100644 --- a/tests/models/git/test_processor_git.py +++ b/tests/models/git/test_processor_git.py @@ -83,7 +83,7 @@ class GitProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -98,7 +98,7 @@ class GitProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str, return_token_type_ids=False) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/gpt2/test_modeling_gpt2.py b/tests/models/gpt2/test_modeling_gpt2.py index d0796468c3..5155f6d9a0 100644 --- a/tests/models/gpt2/test_modeling_gpt2.py +++ b/tests/models/gpt2/test_modeling_gpt2.py @@ -419,7 +419,7 @@ class GPT2ModelTester: def create_and_check_gpt2_weight_initialization(self, config, *args): model = GPT2Model(config) model_std = model.config.initializer_range / math.sqrt(2 * model.config.n_layer) - for key in model.state_dict().keys(): + for key in model.state_dict(): if "c_proj" in key and "weight" in key: self.parent.assertLessEqual(abs(torch.std(model.state_dict()[key]) - model_std), 0.001) self.parent.assertLessEqual(abs(torch.mean(model.state_dict()[key]) - 0.0), 0.01) diff --git a/tests/models/gpt_bigcode/test_modeling_gpt_bigcode.py b/tests/models/gpt_bigcode/test_modeling_gpt_bigcode.py index 26737a6473..21d1764c76 100644 --- a/tests/models/gpt_bigcode/test_modeling_gpt_bigcode.py +++ b/tests/models/gpt_bigcode/test_modeling_gpt_bigcode.py @@ -344,7 +344,7 @@ class GPTBigCodeModelTester: def create_and_check_gpt_bigcode_weight_initialization(self, config, *args): model = GPTBigCodeModel(config) model_std = model.config.initializer_range / math.sqrt(2 * model.config.n_layer) - for key in model.state_dict().keys(): + for key in model.state_dict(): if "c_proj" in key and "weight" in key: self.parent.assertLessEqual(abs(torch.std(model.state_dict()[key]) - model_std), 0.001) self.parent.assertLessEqual(abs(torch.mean(model.state_dict()[key]) - 0.0), 0.01) diff --git a/tests/models/grounding_dino/test_modeling_grounding_dino.py b/tests/models/grounding_dino/test_modeling_grounding_dino.py index 8d94a42100..041d9af20b 100644 --- a/tests/models/grounding_dino/test_modeling_grounding_dino.py +++ b/tests/models/grounding_dino/test_modeling_grounding_dino.py @@ -763,7 +763,7 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase): gpu_outputs = model(**encoding) # 3. assert equivalence - for key in cpu_outputs.keys(): + for key in cpu_outputs: torch.testing.assert_close(cpu_outputs[key], gpu_outputs[key].cpu(), rtol=1e-3, atol=1e-3) expected_logits = torch.tensor( diff --git a/tests/models/grounding_dino/test_processor_grounding_dino.py b/tests/models/grounding_dino/test_processor_grounding_dino.py index 35b77c39f2..89f14153b1 100644 --- a/tests/models/grounding_dino/test_processor_grounding_dino.py +++ b/tests/models/grounding_dino/test_processor_grounding_dino.py @@ -206,7 +206,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino @@ -222,7 +222,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/groupvit/test_modeling_groupvit.py b/tests/models/groupvit/test_modeling_groupvit.py index 73af5e4b4b..a4d521ff2a 100644 --- a/tests/models/groupvit/test_modeling_groupvit.py +++ b/tests/models/groupvit/test_modeling_groupvit.py @@ -635,8 +635,8 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/instructblip/test_modeling_instructblip.py b/tests/models/instructblip/test_modeling_instructblip.py index fe309e700c..fae6092eca 100644 --- a/tests/models/instructblip/test_modeling_instructblip.py +++ b/tests/models/instructblip/test_modeling_instructblip.py @@ -597,7 +597,7 @@ class InstructBlipForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, Gene # added support for it yet. We skip these models for now. has_encoder_attributes = any( attr_name - for attr_name in config.to_dict().keys() + for attr_name in config.to_dict() if attr_name.startswith("encoder") and attr_name != "encoder_no_repeat_ngram_size" ) if has_encoder_attributes: diff --git a/tests/models/instructblip/test_processor_instructblip.py b/tests/models/instructblip/test_processor_instructblip.py index 984826aaa2..82da35ce79 100644 --- a/tests/models/instructblip/test_processor_instructblip.py +++ b/tests/models/instructblip/test_processor_instructblip.py @@ -107,7 +107,7 @@ class InstructBlipProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_processor(self): diff --git a/tests/models/instructblipvideo/test_modeling_instructblipvideo.py b/tests/models/instructblipvideo/test_modeling_instructblipvideo.py index 5f056d4464..129a2d045c 100644 --- a/tests/models/instructblipvideo/test_modeling_instructblipvideo.py +++ b/tests/models/instructblipvideo/test_modeling_instructblipvideo.py @@ -609,7 +609,7 @@ class InstructBlipVideoForConditionalGenerationDecoderOnlyTest( # added support for it yet. We skip these models for now. has_encoder_attributes = any( attr_name - for attr_name in config.to_dict().keys() + for attr_name in config.to_dict() if attr_name.startswith("encoder") and attr_name != "encoder_no_repeat_ngram_size" ) if has_encoder_attributes: diff --git a/tests/models/instructblipvideo/test_processor_instructblipvideo.py b/tests/models/instructblipvideo/test_processor_instructblipvideo.py index 3d3633365a..2c1b897dd1 100644 --- a/tests/models/instructblipvideo/test_processor_instructblipvideo.py +++ b/tests/models/instructblipvideo/test_processor_instructblipvideo.py @@ -110,7 +110,7 @@ class InstructBlipVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = video_processor(image_input, return_tensors="pt") input_processor = processor(images=image_input, return_tensors="pt") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -131,10 +131,10 @@ class InstructBlipVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tokens = tokenizer(input_str, return_token_type_ids=False) encoded_tokens_qformer = qformer_tokenizer(input_str, return_token_type_ids=False) - for key in encoded_tokens.keys(): + for key in encoded_tokens: self.assertListEqual(encoded_tokens[key], encoded_processor[key]) - for key in encoded_tokens_qformer.keys(): + for key in encoded_tokens_qformer: self.assertListEqual(encoded_tokens_qformer[key], encoded_processor["qformer_" + key]) def test_processor(self): diff --git a/tests/models/kosmos2/test_modeling_kosmos2.py b/tests/models/kosmos2/test_modeling_kosmos2.py index 6bb86406ce..6e58c13f03 100644 --- a/tests/models/kosmos2/test_modeling_kosmos2.py +++ b/tests/models/kosmos2/test_modeling_kosmos2.py @@ -579,8 +579,8 @@ class Kosmos2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/kosmos2/test_processor_kosmos2.py b/tests/models/kosmos2/test_processor_kosmos2.py index 587db26ef1..6f539e866b 100644 --- a/tests/models/kosmos2/test_processor_kosmos2.py +++ b/tests/models/kosmos2/test_processor_kosmos2.py @@ -135,7 +135,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_processor = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_image_processor.keys(): + for key in input_image_processor: self.assertAlmostEqual(input_image_processor[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -150,7 +150,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str, return_token_type_ids=False) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/kyutai_speech_to_text/test_modeling_kyutai_speech_to_text.py b/tests/models/kyutai_speech_to_text/test_modeling_kyutai_speech_to_text.py index 6be8879afb..35898887fa 100644 --- a/tests/models/kyutai_speech_to_text/test_modeling_kyutai_speech_to_text.py +++ b/tests/models/kyutai_speech_to_text/test_modeling_kyutai_speech_to_text.py @@ -390,7 +390,7 @@ class KyutaiSpeechToTextModelTest(ModelTesterMixin, GenerationTesterMixin, Pipel # added support for it yet. We skip these models for now. has_encoder_attributes = any( attr_name - for attr_name in config.to_dict().keys() + for attr_name in config.to_dict() if attr_name.startswith("encoder") and attr_name != "encoder_no_repeat_ngram_size" ) if has_encoder_attributes: diff --git a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py index 6eb8abf0b5..ab34b1007d 100644 --- a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py @@ -860,7 +860,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( words, is_pair=False, boxes=boxes, max_length=maximum_length + 10, padding="longest" ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -873,7 +873,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( words, is_pair=False, boxes=boxes, max_length=maximum_length + 10, padding=False ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -1358,7 +1358,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): add_special_tokens=True, ) - for key in tokens_p.keys(): + for key in tokens_p: self.assertEqual(tokens_r[key], tokens_p[key]) if "token_type_ids" in tokens_r: @@ -1389,7 +1389,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): # encode_plus() no_special_tokens = tokenizer_r.encode_plus(words, boxes=boxes, add_special_tokens=False) with_special_tokens = tokenizer_r.encode_plus(words, boxes=boxes, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: self.assertEqual( len(no_special_tokens[key]), len(with_special_tokens[key]) - simple_num_special_tokens_to_add, @@ -1400,7 +1400,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): no_special_tokens = tokenizer_r.batch_encode_plus(words, boxes=boxes, add_special_tokens=False) with_special_tokens = tokenizer_r.batch_encode_plus(words, boxes=boxes, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: for i_no, i_with in zip(no_special_tokens[key], with_special_tokens[key]): self.assertEqual(len(i_no), len(i_with) - simple_num_special_tokens_to_add) diff --git a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py index deee8d31d2..fcc997c769 100644 --- a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py @@ -753,7 +753,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( words, is_pair=False, boxes=boxes, max_length=maximum_length + 10, padding="longest" ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -766,7 +766,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( words, is_pair=False, boxes=boxes, max_length=maximum_length + 10, padding=False ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -1249,7 +1249,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): add_special_tokens=True, ) - for key in tokens_p.keys(): + for key in tokens_p: self.assertEqual(tokens_r[key], tokens_p[key]) if "token_type_ids" in tokens_r: @@ -1280,7 +1280,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): # encode_plus() no_special_tokens = tokenizer_r.encode_plus(words, boxes=boxes, add_special_tokens=False) with_special_tokens = tokenizer_r.encode_plus(words, boxes=boxes, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: self.assertEqual( len(no_special_tokens[key]), len(with_special_tokens[key]) - simple_num_special_tokens_to_add, @@ -1291,7 +1291,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): no_special_tokens = tokenizer_r.batch_encode_plus(words, boxes=boxes, add_special_tokens=False) with_special_tokens = tokenizer_r.batch_encode_plus(words, boxes=boxes, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: for i_no, i_with in zip(no_special_tokens[key], with_special_tokens[key]): self.assertEqual(len(i_no), len(i_with) - simple_num_special_tokens_to_add) diff --git a/tests/models/layoutxlm/test_tokenization_layoutxlm.py b/tests/models/layoutxlm/test_tokenization_layoutxlm.py index 056726f004..506ddd1f24 100644 --- a/tests/models/layoutxlm/test_tokenization_layoutxlm.py +++ b/tests/models/layoutxlm/test_tokenization_layoutxlm.py @@ -791,7 +791,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( words, is_pair=False, boxes=boxes, max_length=maximum_length + 10, padding="longest" ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -804,7 +804,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( words, is_pair=False, boxes=boxes, max_length=maximum_length + 10, padding=False ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -1287,7 +1287,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): add_special_tokens=True, ) - for key in tokens_p.keys(): + for key in tokens_p: self.assertEqual(tokens_r[key], tokens_p[key]) if "token_type_ids" in tokens_r: @@ -1318,7 +1318,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # encode_plus() no_special_tokens = tokenizer_r.encode_plus(words, boxes=boxes, add_special_tokens=False) with_special_tokens = tokenizer_r.encode_plus(words, boxes=boxes, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: self.assertEqual( len(no_special_tokens[key]), len(with_special_tokens[key]) - simple_num_special_tokens_to_add, @@ -1329,7 +1329,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): no_special_tokens = tokenizer_r.batch_encode_plus(words, boxes=boxes, add_special_tokens=False) with_special_tokens = tokenizer_r.batch_encode_plus(words, boxes=boxes, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: for i_no, i_with in zip(no_special_tokens[key], with_special_tokens[key]): self.assertEqual(len(i_no), len(i_with) - simple_num_special_tokens_to_add) diff --git a/tests/models/llava/test_processor_llava.py b/tests/models/llava/test_processor_llava.py index 41b9d8a09e..7e3be530d4 100644 --- a/tests/models/llava/test_processor_llava.py +++ b/tests/models/llava/test_processor_llava.py @@ -77,7 +77,7 @@ class LlavaProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor_loaded = self.processor_class.from_pretrained(self.tmpdirname) processor_dict_loaded = json.loads(processor_loaded.to_json_string()) # chat templates aren't serialized to json in processors - self.assertFalse("chat_template" in processor_dict_loaded.keys()) + self.assertFalse("chat_template" in processor_dict_loaded) # they have to be saved as separate file and loaded back from that file # so we check if the same template is loaded diff --git a/tests/models/llava_next/test_processor_llava_next.py b/tests/models/llava_next/test_processor_llava_next.py index d6156adb75..46d385d2d8 100644 --- a/tests/models/llava_next/test_processor_llava_next.py +++ b/tests/models/llava_next/test_processor_llava_next.py @@ -84,7 +84,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor_loaded = self.processor_class.from_pretrained(self.tmpdirname) processor_dict_loaded = json.loads(processor_loaded.to_json_string()) # chat templates aren't serialized to json in processors - self.assertFalse("chat_template" in processor_dict_loaded.keys()) + self.assertFalse("chat_template" in processor_dict_loaded) # they have to be saved as separate file and loaded back from that file # so we check if the same template is loaded diff --git a/tests/models/llava_next_video/test_processor_llava_next_video.py b/tests/models/llava_next_video/test_processor_llava_next_video.py index 17bcb3657d..3426d1e453 100644 --- a/tests/models/llava_next_video/test_processor_llava_next_video.py +++ b/tests/models/llava_next_video/test_processor_llava_next_video.py @@ -93,7 +93,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor_loaded = self.processor_class.from_pretrained(self.tmpdirname) processor_dict_loaded = json.loads(processor_loaded.to_json_string()) # chat templates aren't serialized to json in processors - self.assertFalse("chat_template" in processor_dict_loaded.keys()) + self.assertFalse("chat_template" in processor_dict_loaded) # they have to be saved as separate file and loaded back from that file # so we check if the same template is loaded diff --git a/tests/models/llava_onevision/test_processor_llava_onevision.py b/tests/models/llava_onevision/test_processor_llava_onevision.py index 1eb3b0d0d4..577ba19cd8 100644 --- a/tests/models/llava_onevision/test_processor_llava_onevision.py +++ b/tests/models/llava_onevision/test_processor_llava_onevision.py @@ -97,7 +97,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor_loaded = self.processor_class.from_pretrained(self.tmpdirname) processor_dict_loaded = json.loads(processor_loaded.to_json_string()) # chat templates aren't serialized to json in processors - self.assertFalse("chat_template" in processor_dict_loaded.keys()) + self.assertFalse("chat_template" in processor_dict_loaded) # they have to be saved as separate file and loaded back from that file # so we check if the same template is loaded diff --git a/tests/models/luke/test_modeling_luke.py b/tests/models/luke/test_modeling_luke.py index 7cc282e49a..5959b1877c 100644 --- a/tests/models/luke/test_modeling_luke.py +++ b/tests/models/luke/test_modeling_luke.py @@ -897,7 +897,7 @@ class LukeModelIntegrationTests(unittest.TestCase): encoding = tokenizer(text, entity_spans=[span], add_prefix_space=True, return_tensors="pt") # move all values to device - for key in encoding.keys(): + for key in encoding: encoding[key] = encoding[key].to(torch_device) outputs = model(**encoding) @@ -932,7 +932,7 @@ class LukeModelIntegrationTests(unittest.TestCase): encoding = tokenizer(text, entity_spans=[span], add_prefix_space=True, return_tensors="pt") # move all values to device - for key in encoding.keys(): + for key in encoding: encoding[key] = encoding[key].to(torch_device) outputs = model(**encoding) diff --git a/tests/models/mamba/test_modeling_mamba.py b/tests/models/mamba/test_modeling_mamba.py index e99c8b1e57..aff98558d5 100644 --- a/tests/models/mamba/test_modeling_mamba.py +++ b/tests/models/mamba/test_modeling_mamba.py @@ -260,7 +260,7 @@ class MambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi """ if isinstance(member, torch.Tensor): max_value, min_value = member.max().item(), member.min().item() - elif isinstance(member, list) or isinstance(member, tuple): + elif isinstance(member, (list, tuple)): max_value, min_value = max(member), min(member) if not isinstance(container, list): diff --git a/tests/models/markuplm/test_tokenization_markuplm.py b/tests/models/markuplm/test_tokenization_markuplm.py index 3cdbd4acf8..3bf526407f 100644 --- a/tests/models/markuplm/test_tokenization_markuplm.py +++ b/tests/models/markuplm/test_tokenization_markuplm.py @@ -627,7 +627,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( nodes, is_pair=False, xpaths=xpaths, max_length=maximum_length + 10, padding="longest" ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -640,7 +640,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( nodes, is_pair=False, xpaths=xpaths, max_length=maximum_length + 10, padding=False ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -1120,7 +1120,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokens_r = tokenizer_r.encode_plus(nodes, xpaths=xpaths, add_special_tokens=True) tokens_p = tokenizer_p.encode_plus(nodes, xpaths=xpaths, add_special_tokens=True) - for key in tokens_p.keys(): + for key in tokens_p: self.assertEqual(tokens_r[key], tokens_p[key]) if "token_type_ids" in tokens_r: @@ -1151,7 +1151,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # encode_plus() no_special_tokens = tokenizer_r.encode_plus(nodes, xpaths=xpaths, add_special_tokens=False) with_special_tokens = tokenizer_r.encode_plus(nodes, xpaths=xpaths, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: self.assertEqual( len(no_special_tokens[key]), len(with_special_tokens[key]) - simple_num_special_tokens_to_add, @@ -1162,7 +1162,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase): no_special_tokens = tokenizer_r.batch_encode_plus(nodes, xpaths=xpaths, add_special_tokens=False) with_special_tokens = tokenizer_r.batch_encode_plus(nodes, xpaths=xpaths, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: for i_no, i_with in zip(no_special_tokens[key], with_special_tokens[key]): self.assertEqual(len(i_no), len(i_with) - simple_num_special_tokens_to_add) diff --git a/tests/models/mgp_str/test_processor_mgp_str.py b/tests/models/mgp_str/test_processor_mgp_str.py index ccfb6dba68..7157655abe 100644 --- a/tests/models/mgp_str/test_processor_mgp_str.py +++ b/tests/models/mgp_str/test_processor_mgp_str.py @@ -134,7 +134,7 @@ class MgpstrProcessorTest(unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -148,7 +148,7 @@ class MgpstrProcessorTest(unittest.TestCase): encoded_processor = processor(text=input_str) encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/mimi/test_modeling_mimi.py b/tests/models/mimi/test_modeling_mimi.py index d9b0216b15..2123e93c69 100644 --- a/tests/models/mimi/test_modeling_mimi.py +++ b/tests/models/mimi/test_modeling_mimi.py @@ -270,8 +270,8 @@ class MimiModelTest(ModelTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/mllama/test_processor_mllama.py b/tests/models/mllama/test_processor_mllama.py index e87a7ae751..f20dd5d277 100644 --- a/tests/models/mllama/test_processor_mllama.py +++ b/tests/models/mllama/test_processor_mllama.py @@ -60,7 +60,7 @@ class MllamaProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor_loaded = self.processor_class.from_pretrained(self.tmpdirname) processor_dict_loaded = json.loads(processor_loaded.to_json_string()) # chat templates aren't serialized to json in processors - self.assertFalse("chat_template" in processor_dict_loaded.keys()) + self.assertFalse("chat_template" in processor_dict_loaded) # they have to be saved as separate file and loaded back from that file # so we check if the same template is loaded diff --git a/tests/models/moshi/test_modeling_moshi.py b/tests/models/moshi/test_modeling_moshi.py index 4f5b168959..f750f35cf3 100644 --- a/tests/models/moshi/test_modeling_moshi.py +++ b/tests/models/moshi/test_modeling_moshi.py @@ -69,7 +69,7 @@ if is_torch_available(): def _config_zero_init(config): configs_no_init = copy.deepcopy(config) - for key in configs_no_init.__dict__.keys(): + for key in configs_no_init.__dict__: if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key: setattr(configs_no_init, key, 1e-10) if isinstance(getattr(configs_no_init, key, None), PretrainedConfig): diff --git a/tests/models/mpt/test_modeling_mpt.py b/tests/models/mpt/test_modeling_mpt.py index 15d8fddb9f..75c9f65e92 100644 --- a/tests/models/mpt/test_modeling_mpt.py +++ b/tests/models/mpt/test_modeling_mpt.py @@ -310,7 +310,7 @@ class MptModelTester: def create_and_check_mpt_weight_initialization(self, config, *args): model = MptModel(config) model_std = model.config.initializer_range / math.sqrt(2 * model.config.n_layers) - for key in model.state_dict().keys(): + for key in model.state_dict(): if "c_proj" in key and "weight" in key: self.parent.assertLessEqual(abs(torch.std(model.state_dict()[key]) - model_std), 0.001) self.parent.assertLessEqual(abs(torch.mean(model.state_dict()[key]) - 0.0), 0.01) diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index 8a41c47d6f..bd4290f05c 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -64,7 +64,7 @@ if is_torch_available(): def _config_zero_init(config): configs_no_init = copy.deepcopy(config) - for key in configs_no_init.__dict__.keys(): + for key in configs_no_init.__dict__: if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key: setattr(configs_no_init, key, 1e-10) if isinstance(getattr(configs_no_init, key, None), PretrainedConfig): diff --git a/tests/models/musicgen/test_processor_musicgen.py b/tests/models/musicgen/test_processor_musicgen.py index be1e855725..9070b753e5 100644 --- a/tests/models/musicgen/test_processor_musicgen.py +++ b/tests/models/musicgen/test_processor_musicgen.py @@ -106,7 +106,7 @@ class MusicgenProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -121,7 +121,7 @@ class MusicgenProcessorTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_decode(self): diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index 72b20f345b..a09f37d115 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -68,7 +68,7 @@ if is_torchaudio_available(): def _config_zero_init(config): configs_no_init = copy.deepcopy(config) - for key in configs_no_init.__dict__.keys(): + for key in configs_no_init.__dict__: if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key: setattr(configs_no_init, key, 1e-10) if isinstance(getattr(configs_no_init, key, None), PretrainedConfig): diff --git a/tests/models/musicgen_melody/test_processor_musicgen_melody.py b/tests/models/musicgen_melody/test_processor_musicgen_melody.py index 04fb94c64c..fbeee0543b 100644 --- a/tests/models/musicgen_melody/test_processor_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_processor_musicgen_melody.py @@ -111,7 +111,7 @@ class MusicgenMelodyProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -126,7 +126,7 @@ class MusicgenMelodyProcessorTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_decode(self): diff --git a/tests/models/omdet_turbo/test_processor_omdet_turbo.py b/tests/models/omdet_turbo/test_processor_omdet_turbo.py index 500e83c56b..262cff8947 100644 --- a/tests/models/omdet_turbo/test_processor_omdet_turbo.py +++ b/tests/models/omdet_turbo/test_processor_omdet_turbo.py @@ -142,7 +142,7 @@ class OmDetTurboProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -157,7 +157,7 @@ class OmDetTurboProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str, padding="max_length", truncation=True, max_length=77) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/owlv2/test_modeling_owlv2.py b/tests/models/owlv2/test_modeling_owlv2.py index bbbe6824be..d35cd77159 100644 --- a/tests/models/owlv2/test_modeling_owlv2.py +++ b/tests/models/owlv2/test_modeling_owlv2.py @@ -524,8 +524,8 @@ class Owlv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { @@ -738,8 +738,8 @@ class Owlv2ForObjectDetectionTest(ModelTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/owlvit/test_modeling_owlvit.py b/tests/models/owlvit/test_modeling_owlvit.py index d150ae7899..01d45d7e43 100644 --- a/tests/models/owlvit/test_modeling_owlvit.py +++ b/tests/models/owlvit/test_modeling_owlvit.py @@ -519,8 +519,8 @@ class OwlViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { @@ -731,8 +731,8 @@ class OwlViTForObjectDetectionTest(ModelTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/owlvit/test_processor_owlvit.py b/tests/models/owlvit/test_processor_owlvit.py index f31dbaf9fb..069fa7b776 100644 --- a/tests/models/owlvit/test_processor_owlvit.py +++ b/tests/models/owlvit/test_processor_owlvit.py @@ -128,7 +128,7 @@ class OwlViTProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -143,7 +143,7 @@ class OwlViTProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str, return_tensors="np") - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key][0].tolist(), encoded_processor[key][0].tolist()) def test_processor(self): diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index 6c2aceea53..3966f4a3a0 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -431,7 +431,7 @@ class PerceiverModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas if model_class.__name__ == "PerceiverForMultimodalAutoencoding": # model outputs a dictionary with logits per modality, let's verify each modality - for modality in first.keys(): + for modality in first: out_1 = first[modality].cpu().numpy() out_2 = second[modality].cpu().numpy() out_1 = out_1[~np.isnan(out_1)] @@ -677,7 +677,7 @@ class PerceiverModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas hidden_states_with_chunk = model(**self._prepare_for_class(inputs_dict, model_class))[0] if model_class.__name__ == "PerceiverForMultimodalAutoencoding": # model outputs a dictionary with logits for each modality - for modality in hidden_states_no_chunk.keys(): + for modality in hidden_states_no_chunk: self.assertTrue( torch.allclose(hidden_states_no_chunk[modality], hidden_states_with_chunk[modality], atol=1e-3) ) @@ -695,7 +695,7 @@ class PerceiverModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas outputs = model(**self._prepare_for_class(inputs_dict, model_class)) if model_class.__name__ == "PerceiverForMultimodalAutoencoding": - for modality in outputs[0].keys(): + for modality in outputs[0]: out_2 = outputs[0][modality].cpu().numpy() out_2[np.isnan(out_2)] = 0 diff --git a/tests/models/perception_lm/test_processor_perception_lm.py b/tests/models/perception_lm/test_processor_perception_lm.py index 28f5a56e4c..c6384e4b45 100644 --- a/tests/models/perception_lm/test_processor_perception_lm.py +++ b/tests/models/perception_lm/test_processor_perception_lm.py @@ -83,7 +83,7 @@ class PerceptionLMProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor_loaded = self.processor_class.from_pretrained(self.tmpdirname) processor_dict_loaded = json.loads(processor_loaded.to_json_string()) # chat templates aren't serialized to json in processors - self.assertFalse("chat_template" in processor_dict_loaded.keys()) + self.assertFalse("chat_template" in processor_dict_loaded) # they have to be saved as separate file and loaded back from that file # so we check if the same template is loaded diff --git a/tests/models/pix2struct/test_modeling_pix2struct.py b/tests/models/pix2struct/test_modeling_pix2struct.py index d6a0fa5f08..64ebdd4fb5 100644 --- a/tests/models/pix2struct/test_modeling_pix2struct.py +++ b/tests/models/pix2struct/test_modeling_pix2struct.py @@ -702,8 +702,8 @@ class Pix2StructModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/pix2struct/test_processor_pix2struct.py b/tests/models/pix2struct/test_processor_pix2struct.py index d58605d444..5bc3a10336 100644 --- a/tests/models/pix2struct/test_processor_pix2struct.py +++ b/tests/models/pix2struct/test_processor_pix2struct.py @@ -90,7 +90,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -105,7 +105,7 @@ class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str, return_token_type_ids=False, add_special_tokens=True) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/pop2piano/test_processor_pop2piano.py b/tests/models/pop2piano/test_processor_pop2piano.py index b8cdf63ff9..41b37621ec 100644 --- a/tests/models/pop2piano/test_processor_pop2piano.py +++ b/tests/models/pop2piano/test_processor_pop2piano.py @@ -155,7 +155,7 @@ class Pop2PianoProcessorTest(unittest.TestCase): ) processor_outputs = processor(audio=input_speech, sampling_rate=sampling_rate, return_tensors="np") - for key in feature_extractor_outputs.keys(): + for key in feature_extractor_outputs: self.assertTrue(np.allclose(feature_extractor_outputs[key], processor_outputs[key], atol=1e-4)) def test_processor_batch_decode(self): diff --git a/tests/models/qwen2_5_omni/test_processor_qwen2_5_omni.py b/tests/models/qwen2_5_omni/test_processor_qwen2_5_omni.py index 0baea494cd..eaea49fb44 100644 --- a/tests/models/qwen2_5_omni/test_processor_qwen2_5_omni.py +++ b/tests/models/qwen2_5_omni/test_processor_qwen2_5_omni.py @@ -278,7 +278,7 @@ class Qwen2_5OmniProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="pt") input_processor = processor(images=image_input, text="dummy", return_tensors="pt") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) def test_processor(self): diff --git a/tests/models/qwen2_5_vl/test_processor_qwen2_5_vl.py b/tests/models/qwen2_5_vl/test_processor_qwen2_5_vl.py index c3f478950f..dc15d5ef86 100644 --- a/tests/models/qwen2_5_vl/test_processor_qwen2_5_vl.py +++ b/tests/models/qwen2_5_vl/test_processor_qwen2_5_vl.py @@ -108,7 +108,7 @@ class Qwen2_5_VLProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="pt") input_processor = processor(images=image_input, text="dummy", return_tensors="pt") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) def test_processor(self): diff --git a/tests/models/qwen2_vl/test_processor_qwen2_vl.py b/tests/models/qwen2_vl/test_processor_qwen2_vl.py index 69fae59595..09ed36beb6 100644 --- a/tests/models/qwen2_vl/test_processor_qwen2_vl.py +++ b/tests/models/qwen2_vl/test_processor_qwen2_vl.py @@ -112,7 +112,7 @@ class Qwen2VLProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_image_proc = image_processor(image_input, return_tensors="pt") input_processor = processor(images=image_input, text="dummy", return_tensors="pt") - for key in input_image_proc.keys(): + for key in input_image_proc: self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2) def test_processor(self): diff --git a/tests/models/rt_detr/test_modeling_rt_detr.py b/tests/models/rt_detr/test_modeling_rt_detr.py index fad9093426..809f4acfde 100644 --- a/tests/models/rt_detr/test_modeling_rt_detr.py +++ b/tests/models/rt_detr/test_modeling_rt_detr.py @@ -603,7 +603,7 @@ class RTDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): # Skip the check for the backbone for name, module in model.named_modules(): if module.__class__.__name__ == "RTDetrConvEncoder": - backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()] + backbone_params = [f"{name}.{key}" for key in module.state_dict()] break for name, param in model.named_parameters(): diff --git a/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py b/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py index 79202d3cf7..52cd5ac0c4 100644 --- a/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py +++ b/tests/models/rt_detr_v2/test_modeling_rt_detr_v2.py @@ -607,7 +607,7 @@ class RTDetrV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase # Skip the check for the backbone for name, module in model.named_modules(): if module.__class__.__name__ == "RTDetrV2ConvEncoder": - backbone_params = [f"{name}.{key}" for key in module.state_dict().keys()] + backbone_params = [f"{name}.{key}" for key in module.state_dict()] break for name, param in model.named_parameters(): diff --git a/tests/models/rwkv/test_modeling_rwkv.py b/tests/models/rwkv/test_modeling_rwkv.py index 4e92baf2d3..8fe4070e9c 100644 --- a/tests/models/rwkv/test_modeling_rwkv.py +++ b/tests/models/rwkv/test_modeling_rwkv.py @@ -238,7 +238,7 @@ class RwkvModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin """ if isinstance(member, torch.Tensor): max_value, min_value = member.max().item(), member.min().item() - elif isinstance(member, list) or isinstance(member, tuple): + elif isinstance(member, (list, tuple)): max_value, min_value = max(member), min(member) if not isinstance(container, list): diff --git a/tests/models/sam/test_processor_sam.py b/tests/models/sam/test_processor_sam.py index 852df56f99..b609515205 100644 --- a/tests/models/sam/test_processor_sam.py +++ b/tests/models/sam/test_processor_sam.py @@ -98,7 +98,7 @@ class SamProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) for image in input_feat_extract.pixel_values: @@ -123,7 +123,7 @@ class SamProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(images=image_input, segmentation_maps=mask_input, return_tensors="np") input_processor = processor(images=image_input, segmentation_maps=mask_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) for label in input_feat_extract.labels: diff --git a/tests/models/sam_hq/test_processor_samhq.py b/tests/models/sam_hq/test_processor_samhq.py index 89a642ba60..36e00a9ce0 100644 --- a/tests/models/sam_hq/test_processor_samhq.py +++ b/tests/models/sam_hq/test_processor_samhq.py @@ -107,7 +107,7 @@ class SamHQProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="pt") input_processor = processor(images=image_input, return_tensors="pt") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum().item(), input_processor[key].sum().item(), delta=1e-2) for image in input_feat_extract.pixel_values: @@ -132,7 +132,7 @@ class SamHQProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(images=image_input, segmentation_maps=mask_input, return_tensors="pt") input_processor = processor(images=image_input, segmentation_maps=mask_input, return_tensors="pt") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum().item(), input_processor[key].sum().item(), delta=1e-2) for label in input_feat_extract.labels: diff --git a/tests/models/seamless_m4t/test_processor_seamless_m4t.py b/tests/models/seamless_m4t/test_processor_seamless_m4t.py index 7beefb16bd..f963fa2098 100644 --- a/tests/models/seamless_m4t/test_processor_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_processor_seamless_m4t.py @@ -51,9 +51,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase): processor = SeamlessM4TProcessor.from_pretrained(self.tmpdirname) self.assertEqual(processor.tokenizer.get_vocab(), tokenizer.get_vocab()) - tokenizer_instance = isinstance(processor.tokenizer, SeamlessM4TTokenizerFast) or isinstance( - processor.tokenizer, SeamlessM4TTokenizer - ) + tokenizer_instance = isinstance(processor.tokenizer, (SeamlessM4TTokenizerFast, SeamlessM4TTokenizer)) self.assertTrue(tokenizer_instance) self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string()) @@ -75,9 +73,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase): self.assertIsInstance(processor.feature_extractor, SeamlessM4TFeatureExtractor) self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab()) - tokenizer_instance = isinstance(processor.tokenizer, SeamlessM4TTokenizerFast) or isinstance( - processor.tokenizer, SeamlessM4TTokenizer - ) + tokenizer_instance = isinstance(processor.tokenizer, (SeamlessM4TTokenizerFast, SeamlessM4TTokenizer)) self.assertTrue(tokenizer_instance) # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T @@ -92,7 +88,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(audios=raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T @@ -108,7 +104,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T diff --git a/tests/models/siglip/test_modeling_siglip.py b/tests/models/siglip/test_modeling_siglip.py index b5c14280bb..295c127b36 100644 --- a/tests/models/siglip/test_modeling_siglip.py +++ b/tests/models/siglip/test_modeling_siglip.py @@ -553,8 +553,8 @@ class SiglipModelTest(SiglipModelTesterMixin, PipelineTesterMixin, unittest.Test loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py index 593aca1406..0ebbc6631e 100644 --- a/tests/models/speech_to_text/test_modeling_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py @@ -664,8 +664,8 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/speech_to_text/test_processor_speech_to_text.py b/tests/models/speech_to_text/test_processor_speech_to_text.py index 4f2c714295..f4794cca53 100644 --- a/tests/models/speech_to_text/test_processor_speech_to_text.py +++ b/tests/models/speech_to_text/test_processor_speech_to_text.py @@ -115,7 +115,7 @@ class Speech2TextProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -130,7 +130,7 @@ class Speech2TextProcessorTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_decode(self): diff --git a/tests/models/speecht5/test_processor_speecht5.py b/tests/models/speecht5/test_processor_speecht5.py index 75340fa4e0..ea84104ac7 100644 --- a/tests/models/speecht5/test_processor_speecht5.py +++ b/tests/models/speecht5/test_processor_speecht5.py @@ -122,7 +122,7 @@ class SpeechT5ProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(audio=raw_speech, return_tensors="np") input_processor = processor(audio=raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_feature_extractor_target(self): @@ -136,7 +136,7 @@ class SpeechT5ProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(audio_target=raw_speech, return_tensors="np") input_processor = processor(audio_target=raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -150,7 +150,7 @@ class SpeechT5ProcessorTest(unittest.TestCase): encoded_processor = processor(text=input_str) encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_target(self): @@ -164,7 +164,7 @@ class SpeechT5ProcessorTest(unittest.TestCase): encoded_processor = processor(text_target=input_str) encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_decode(self): diff --git a/tests/models/tapas/test_tokenization_tapas.py b/tests/models/tapas/test_tokenization_tapas.py index 6f3b96166d..998ec823ef 100644 --- a/tests/models/tapas/test_tokenization_tapas.py +++ b/tests/models/tapas/test_tokenization_tapas.py @@ -691,7 +691,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( table, sequences, max_length=maximum_length + 10, padding="longest" ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -702,7 +702,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( table, sequences, max_length=maximum_length + 10, padding=False ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], diff --git a/tests/models/trocr/test_processor_trocr.py b/tests/models/trocr/test_processor_trocr.py index e2d64aeb0e..205f268fe6 100644 --- a/tests/models/trocr/test_processor_trocr.py +++ b/tests/models/trocr/test_processor_trocr.py @@ -91,7 +91,7 @@ class TrOCRProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = image_processor(image_input, return_tensors="np") input_processor = processor(images=image_input, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -103,7 +103,7 @@ class TrOCRProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_processor = processor(text=input_str) encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor_text(self): diff --git a/tests/models/udop/test_tokenization_udop.py b/tests/models/udop/test_tokenization_udop.py index 581cee32e2..7badff1b4b 100644 --- a/tests/models/udop/test_tokenization_udop.py +++ b/tests/models/udop/test_tokenization_udop.py @@ -721,7 +721,7 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus_boxes( words, is_pair=False, boxes=boxes, max_length=maximum_length + 10, padding="longest" ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -734,7 +734,7 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase): encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus_boxes( words, is_pair=False, boxes=boxes, max_length=maximum_length + 10, padding=False ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -1231,7 +1231,7 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase): add_special_tokens=True, ) - for key in tokens_p.keys(): + for key in tokens_p: self.assertEqual(tokens_r[key], tokens_p[key]) if "token_type_ids" in tokens_r: @@ -1262,7 +1262,7 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase): # encode_plus() no_special_tokens = tokenizer_r.encode_plus_boxes(words, boxes=boxes, add_special_tokens=False) with_special_tokens = tokenizer_r.encode_plus_boxes(words, boxes=boxes, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: self.assertEqual( len(no_special_tokens[key]), len(with_special_tokens[key]) - simple_num_special_tokens_to_add, @@ -1273,7 +1273,7 @@ class UdopTokenizationTest(TokenizerTesterMixin, unittest.TestCase): no_special_tokens = tokenizer_r.batch_encode_plus_boxes(words, boxes=boxes, add_special_tokens=False) with_special_tokens = tokenizer_r.batch_encode_plus_boxes(words, boxes=boxes, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: for i_no, i_with in zip(no_special_tokens[key], with_special_tokens[key]): self.assertEqual(len(i_no), len(i_with) - simple_num_special_tokens_to_add) diff --git a/tests/models/vision_text_dual_encoder/test_processor_vision_text_dual_encoder.py b/tests/models/vision_text_dual_encoder/test_processor_vision_text_dual_encoder.py index fdae4bf8e1..814c202fdb 100644 --- a/tests/models/vision_text_dual_encoder/test_processor_vision_text_dual_encoder.py +++ b/tests/models/vision_text_dual_encoder/test_processor_vision_text_dual_encoder.py @@ -120,7 +120,7 @@ class VisionTextDualEncoderProcessorTest(ProcessorTesterMixin, unittest.TestCase input_feat_extract = image_processor(image_input, return_tensors="pt") input_processor = processor(images=image_input, return_tensors="pt") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -135,7 +135,7 @@ class VisionTextDualEncoderProcessorTest(ProcessorTesterMixin, unittest.TestCase encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_processor(self): diff --git a/tests/models/visual_bert/test_modeling_visual_bert.py b/tests/models/visual_bert/test_modeling_visual_bert.py index 8b715137a3..09c96a2467 100644 --- a/tests/models/visual_bert/test_modeling_visual_bert.py +++ b/tests/models/visual_bert/test_modeling_visual_bert.py @@ -318,7 +318,7 @@ class VisualBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) if model_class == VisualBertForMultipleChoice: - for key in inputs_dict.keys(): + for key in inputs_dict: value = inputs_dict[key] if isinstance(value, torch.Tensor) and value.ndim > 1: if key != "visual_embeds": diff --git a/tests/models/vits/test_modeling_vits.py b/tests/models/vits/test_modeling_vits.py index ecd1a73f33..7f8e9d6e7f 100644 --- a/tests/models/vits/test_modeling_vits.py +++ b/tests/models/vits/test_modeling_vits.py @@ -55,7 +55,7 @@ GENERATION_CONFIG_NAME = "generation_config.json" def _config_zero_init(config): configs_no_init = copy.deepcopy(config) - for key in configs_no_init.__dict__.keys(): + for key in configs_no_init.__dict__: if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key: setattr(configs_no_init, key, 1e-10) if isinstance(getattr(configs_no_init, key, None), PretrainedConfig): diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py index 560a8af6d9..3b38f2d8b6 100644 --- a/tests/models/wav2vec2/test_modeling_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py @@ -1982,7 +1982,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): "en": "joe keton disapproved of films and buster also had reservations about the media", } - for lang in LANG_MAP.keys(): + for lang in LANG_MAP: assert run_model(lang) == TRANSCRIPTIONS[lang] @require_flash_attn diff --git a/tests/models/wav2vec2/test_processor_wav2vec2.py b/tests/models/wav2vec2/test_processor_wav2vec2.py index 4001421681..1ed5fa2982 100644 --- a/tests/models/wav2vec2/test_processor_wav2vec2.py +++ b/tests/models/wav2vec2/test_processor_wav2vec2.py @@ -126,7 +126,7 @@ class Wav2Vec2ProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -140,7 +140,7 @@ class Wav2Vec2ProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_decode(self): diff --git a/tests/models/wav2vec2_bert/test_processor_wav2vec2_bert.py b/tests/models/wav2vec2_bert/test_processor_wav2vec2_bert.py index c269e15db3..5c4f062de7 100644 --- a/tests/models/wav2vec2_bert/test_processor_wav2vec2_bert.py +++ b/tests/models/wav2vec2_bert/test_processor_wav2vec2_bert.py @@ -127,7 +127,7 @@ class Wav2Vec2BertProcessorTest(ProcessorTesterMixin, unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -141,7 +141,7 @@ class Wav2Vec2BertProcessorTest(ProcessorTesterMixin, unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_decode(self): diff --git a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py index 66fc8665cb..97e83ff367 100644 --- a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py +++ b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py @@ -154,7 +154,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_another_feature_extractor(self): @@ -169,7 +169,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) self.assertListEqual( @@ -199,7 +199,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def _get_dummy_logits(self, shape=(2, 10, 16), seed=77): @@ -394,7 +394,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): input_wav2vec2 = processor_wav2vec2(raw_speech, return_tensors="np") input_auto = processor_auto(raw_speech, return_tensors="np") - for key in input_wav2vec2.keys(): + for key in input_wav2vec2: self.assertAlmostEqual(input_wav2vec2[key].sum(), input_auto[key].sum(), delta=1e-2) logits = self._get_dummy_logits() diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index d0aeecf806..aa82a8108c 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -1016,8 +1016,8 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/whisper/test_processor_whisper.py b/tests/models/whisper/test_processor_whisper.py index 86451b5412..a80e7950b2 100644 --- a/tests/models/whisper/test_processor_whisper.py +++ b/tests/models/whisper/test_processor_whisper.py @@ -93,7 +93,7 @@ class WhisperProcessorTest(unittest.TestCase): input_feat_extract = feature_extractor(raw_speech, return_tensors="np") input_processor = processor(raw_speech, return_tensors="np") - for key in input_feat_extract.keys(): + for key in input_feat_extract: self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) def test_tokenizer(self): @@ -108,7 +108,7 @@ class WhisperProcessorTest(unittest.TestCase): encoded_tok = tokenizer(input_str) - for key in encoded_tok.keys(): + for key in encoded_tok: self.assertListEqual(encoded_tok[key], encoded_processor[key]) def test_tokenizer_decode(self): diff --git a/tests/models/x_clip/test_modeling_x_clip.py b/tests/models/x_clip/test_modeling_x_clip.py index 5c125f0797..fe6e8728be 100644 --- a/tests/models/x_clip/test_modeling_x_clip.py +++ b/tests/models/x_clip/test_modeling_x_clip.py @@ -638,8 +638,8 @@ class XCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { diff --git a/tests/models/xglm/test_modeling_xglm.py b/tests/models/xglm/test_modeling_xglm.py index ace2f0f511..caa1074282 100644 --- a/tests/models/xglm/test_modeling_xglm.py +++ b/tests/models/xglm/test_modeling_xglm.py @@ -258,7 +258,7 @@ class XGLMModelTester: def create_and_check_xglm_weight_initialization(self, config, *args): model = XGLMModel(config) model_std = model.config.initializer_range / math.sqrt(2 * model.config.num_hidden_layers) - for key in model.state_dict().keys(): + for key in model.state_dict(): if "c_proj" in key and "weight" in key: self.parent.assertLessEqual(abs(torch.std(model.state_dict()[key]) - model_std), 0.001) self.parent.assertLessEqual(abs(torch.mean(model.state_dict()[key]) - 0.0), 0.01) diff --git a/tests/peft_integration/test_peft_integration.py b/tests/peft_integration/test_peft_integration.py index 0ce14f0412..0cf97ea80d 100644 --- a/tests/peft_integration/test_peft_integration.py +++ b/tests/peft_integration/test_peft_integration.py @@ -115,7 +115,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin): state_dict = peft_model.get_adapter_state_dict() - for key in state_dict.keys(): + for key in state_dict: self.assertTrue("lora" in key) def test_peft_save_pretrained(self): @@ -242,7 +242,7 @@ class PeftIntegrationTester(unittest.TestCase, PeftTesterMixin): self.assertTrue(_has_modules_to_save_wrapper) state_dict = model.get_adapter_state_dict() - self.assertTrue("lm_head.weight" in state_dict.keys()) + self.assertTrue("lm_head.weight" in state_dict) logits = model(dummy_input).logits loss = logits.mean() diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index a701f86f66..a6bceef116 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -568,7 +568,7 @@ class PipelineUtilsTest(unittest.TestCase): from transformers.pipelines import SUPPORTED_TASKS set_seed_fn = lambda: torch.manual_seed(0) # noqa: E731 - for task in SUPPORTED_TASKS.keys(): + for task in SUPPORTED_TASKS: if task == "table-question-answering": # test table in separate test due to more dependencies continue @@ -627,7 +627,7 @@ class PipelineUtilsTest(unittest.TestCase): model_ids = [] revisions = [] tasks = [] - for translation_pair in task_dict["default"].keys(): + for translation_pair in task_dict["default"]: model_id, revision = task_dict["default"][translation_pair]["model"][framework] model_ids.append(model_id) diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py index 3c0977f21c..a5ba1a861f 100644 --- a/tests/quantization/bnb/test_4bit.py +++ b/tests/quantization/bnb/test_4bit.py @@ -726,7 +726,7 @@ class BaseSerializationTest(unittest.TestCase): d1 = dict(model_1.named_parameters()) self.assertTrue(d0.keys() == d1.keys()) - for k in d0.keys(): + for k in d0: self.assertTrue(d0[k].shape == d1[k].shape) self.assertTrue(d0[k].device.type == d1[k].device.type) self.assertTrue(d0[k].device == d1[k].device) diff --git a/tests/quantization/quanto_integration/test_quanto.py b/tests/quantization/quanto_integration/test_quanto.py index a4e0b47869..5cc0814332 100644 --- a/tests/quantization/quanto_integration/test_quanto.py +++ b/tests/quantization/quanto_integration/test_quanto.py @@ -251,7 +251,7 @@ class QuantoQuantizationTest(unittest.TestCase): d0 = dict(model1.named_parameters()) d1 = dict(model2.named_parameters()) self.assertTrue(d0.keys() == d1.keys()) - for k in d0.keys(): + for k in d0: self.assertTrue(d0[k].shape == d1[k].shape) self.assertTrue(d0[k].device.type == d1[k].device.type) self.assertTrue(d0[k].device == d1[k].device) diff --git a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py index 2b1a19b9ab..d18484d1cd 100644 --- a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py +++ b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py @@ -136,7 +136,7 @@ class DataTrainingArguments: def __post_init__(self): if self.task_name is not None: self.task_name = self.task_name.lower() - if self.task_name not in task_to_keys.keys(): + if self.task_name not in task_to_keys: raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys())) elif self.train_file is None or self.validation_file is None: raise ValueError("Need either a GLUE task or a training/validation file.") @@ -271,7 +271,7 @@ def main(): else: raise ValueError("Need either a GLUE task or a test file for `do_predict`.") - for key in data_files.keys(): + for key in data_files: logger.info(f"load a local file for {key}: {data_files[key]}") if data_args.train_file.endswith(".csv"): diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 9454f4b4e5..d0b2e902e6 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -489,7 +489,7 @@ def _test_eager_matches_sdpa_inference( def _config_zero_init(config): configs_no_init = copy.deepcopy(config) - for key in configs_no_init.__dict__.keys(): + for key in configs_no_init.__dict__: if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key: setattr(configs_no_init, key, 1e-10) if isinstance(getattr(configs_no_init, key, None), PretrainedConfig): @@ -949,7 +949,7 @@ class ModelTesterMixin: state_dict = model.state_dict() def check_equal(loaded): - for key in state_dict.keys(): + for key in state_dict: max_diff = torch.max( state_dict()[key] ^ loaded[key] if isinstance(state_dict[key], torch.BoolTensor) @@ -1482,8 +1482,8 @@ class ModelTesterMixin: loaded_model_state_dict = loaded_model.state_dict() non_persistent_buffers = {} - for key in loaded_model_state_dict.keys(): - if key not in model_state_dict.keys(): + for key in loaded_model_state_dict: + if key not in model_state_dict: non_persistent_buffers[key] = loaded_model_state_dict[key] loaded_model_state_dict = { @@ -3431,7 +3431,7 @@ class ModelTesterMixin: set_seed(0) new_model = MyClass.from_pretrained(tmpdirname, num_labels=4, ignore_mismatched_sizes=True) - for key in new_model.state_dict().keys(): + for key in new_model.state_dict(): # check weight values for weights with matched shapes are identical # (i.e. correctly loaded from the checkpoint) if key not in ["linear.weight", "linear.bias"]: @@ -3632,12 +3632,12 @@ class ModelTesterMixin: # set eager as it will be the one supported in all models # we just need to test if passing 'attn_implementation' as a dict fails or not attn_implementation_per_subconfig = {"": "eager"} - for key in config.sub_configs.keys(): + for key in config.sub_configs: attn_implementation_per_subconfig[key] = "eager" config._attn_implementation = attn_implementation_per_subconfig model = model_class(config) - for key in config.sub_configs.keys(): + for key in config.sub_configs: sub_config = getattr(model.config, key) self.assertTrue(sub_config._attn_implementation == "eager") diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index 3461721808..b70a778640 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -427,7 +427,7 @@ class TokenizerTesterMixin: # Switch from batch_encode_plus format: {'input_ids': [[...], [...]], ...} # to the list of examples/ encode_plus format: [{'input_ids': [...], ...}, {'input_ids': [...], ...}] return [ - {value: batch_encode_plus_sequences[value][i] for value in batch_encode_plus_sequences.keys()} + {value: batch_encode_plus_sequences[value][i] for value in batch_encode_plus_sequences} for i in range(len(batch_encode_plus_sequences["input_ids"])) ] @@ -2792,7 +2792,7 @@ class TokenizerTesterMixin: encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( sequences, max_length=maximum_length + 10, padding="longest" ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -2803,7 +2803,7 @@ class TokenizerTesterMixin: encoded_sequences_batch_padded_2 = tokenizer.batch_encode_plus( sequences, max_length=maximum_length + 10, padding=False ) - for key in encoded_sequences_batch_padded_1.keys(): + for key in encoded_sequences_batch_padded_1: self.assertListEqual( encoded_sequences_batch_padded_1[key], encoded_sequences_batch_padded_2[key], @@ -2933,11 +2933,11 @@ class TokenizerTesterMixin: # Test encode_plus for pretokenized inputs output = tokenizer.encode_plus(token_sequence, is_split_into_words=True, add_special_tokens=False) output_sequence = tokenizer.encode_plus(sequence, add_special_tokens=False) - for key in output.keys(): + for key in output: self.assertEqual(output[key], output_sequence[key]) output = tokenizer.encode_plus(token_sequence, is_split_into_words=True, add_special_tokens=True) output_sequence = tokenizer.encode_plus(sequence, add_special_tokens=True) - for key in output.keys(): + for key in output: self.assertEqual(output[key], output_sequence[key]) # Test batch_encode_plus for pretokenized inputs @@ -2951,7 +2951,7 @@ class TokenizerTesterMixin: output_sequence = tokenizer.batch_encode_plus( sequence_batch_cleaned_up_spaces, add_special_tokens=False ) - for key in output.keys(): + for key in output: self.assertEqual(output[key], output_sequence[key]) output = tokenizer.batch_encode_plus( token_sequence_batch, is_split_into_words=True, add_special_tokens=True @@ -2959,7 +2959,7 @@ class TokenizerTesterMixin: output_sequence = tokenizer.batch_encode_plus( sequence_batch_cleaned_up_spaces, add_special_tokens=True ) - for key in output.keys(): + for key in output: self.assertEqual(output[key], output_sequence[key]) # Test encode for pretokenized inputs pairs @@ -2979,13 +2979,13 @@ class TokenizerTesterMixin: token_sequence, token_sequence, is_split_into_words=True, add_special_tokens=False ) output_sequence = tokenizer.encode_plus(sequence, sequence, add_special_tokens=False) - for key in output.keys(): + for key in output: self.assertEqual(output[key], output_sequence[key]) output = tokenizer.encode_plus( token_sequence, token_sequence, is_split_into_words=True, add_special_tokens=True ) output_sequence = tokenizer.encode_plus(sequence, sequence, add_special_tokens=True) - for key in output.keys(): + for key in output: self.assertEqual(output[key], output_sequence[key]) # Test batch_encode_plus for pretokenized inputs pairs @@ -3003,7 +3003,7 @@ class TokenizerTesterMixin: output_sequence = tokenizer.batch_encode_plus( sequence_pair_batch_cleaned_up_spaces, add_special_tokens=False ) - for key in output.keys(): + for key in output: self.assertEqual(output[key], output_sequence[key]) output = tokenizer.batch_encode_plus( token_sequence_pair_batch, is_split_into_words=True, add_special_tokens=True @@ -3011,7 +3011,7 @@ class TokenizerTesterMixin: output_sequence = tokenizer.batch_encode_plus( sequence_pair_batch_cleaned_up_spaces, add_special_tokens=True ) - for key in output.keys(): + for key in output: self.assertEqual(output[key], output_sequence[key]) def test_prepare_for_model(self): @@ -3703,14 +3703,14 @@ class TokenizerTesterMixin: # Test encode_plus for pretokenized inputs output_r = tokenizer_r.encode_plus(pretokenized_input_simple, **kwargs) output_p = tokenizer_p.encode_plus(pretokenized_input_simple, **kwargs) - for key in output_p.keys(): + for key in output_p: self.assertEqual(output_p[key], output_r[key]) # Test batch_encode_plus for pretokenized inputs input_batch = ([pretokenized_input_simple] * 2) + [pretokenized_input_simple + pretokenized_input_pair] output_r = tokenizer_r.batch_encode_plus(input_batch, **batch_kwargs) output_p = tokenizer_p.batch_encode_plus(input_batch, **batch_kwargs) - for key in output_p.keys(): + for key in output_p: self.assertEqual(output_p[key], output_r[key]) # Test encode for pretokenized inputs pairs @@ -3725,7 +3725,7 @@ class TokenizerTesterMixin: # Test encode_plus for pretokenized inputs output_r = tokenizer_r.encode_plus(pretokenized_input_simple, pretokenized_input_pair, **kwargs) output_p = tokenizer_p.encode_plus(pretokenized_input_simple, pretokenized_input_pair, **kwargs) - for key in output_p.keys(): + for key in output_p: self.assertEqual(output_p[key], output_r[key]) # Test batch_encode_plus for pretokenized inputs @@ -3735,7 +3735,7 @@ class TokenizerTesterMixin: ] output_r = tokenizer_r.batch_encode_plus(input_batch_pair, **batch_kwargs) output_p = tokenizer_p.batch_encode_plus(input_batch_pair, **batch_kwargs) - for key in output_p.keys(): + for key in output_p: self.assertEqual(output_p[key], output_r[key]) def test_create_token_type_ids(self): @@ -4125,7 +4125,7 @@ class TokenizerTesterMixin: add_special_tokens=True, ) - for key in tokens_p.keys(): + for key in tokens_p: self.assertEqual(tokens_r[key], tokens_p[key]) if "token_type_ids" in tokens_r: @@ -4161,7 +4161,7 @@ class TokenizerTesterMixin: # encode_plus() no_special_tokens = tokenizer_r.encode_plus(text, add_special_tokens=False) with_special_tokens = tokenizer_r.encode_plus(text, add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: self.assertEqual( len(no_special_tokens[key]), len(with_special_tokens[key]) - simple_num_special_tokens_to_add, @@ -4170,7 +4170,7 @@ class TokenizerTesterMixin: # # batch_encode_plus no_special_tokens = tokenizer_r.batch_encode_plus([text, text], add_special_tokens=False) with_special_tokens = tokenizer_r.batch_encode_plus([text, text], add_special_tokens=True) - for key in no_special_tokens.keys(): + for key in no_special_tokens: for i_no, i_with in zip(no_special_tokens[key], with_special_tokens[key]): self.assertEqual(len(i_no), len(i_with) - simple_num_special_tokens_to_add) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 3c2cc0ce89..a0e2911789 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -555,7 +555,7 @@ if is_torch_available(): output_dir=None, **kwargs, ): - label_names = kwargs.get("label_names", None) + label_names = kwargs.get("label_names") gradient_checkpointing = kwargs.get("gradient_checkpointing", False) train_dataset = RegressionDataset(length=train_len, label_names=label_names) eval_dataset = RegressionDataset(length=eval_len, label_names=label_names) @@ -1340,7 +1340,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): trainer.train() args = TrainingArguments(tmp_dir, report_to=[]) dict1, dict2 = args.to_dict(), trainer.args.to_dict() - for key in dict1.keys(): + for key in dict1: # Logging dir can be slightly different as they default to something with the time. if key != "logging_dir": self.assertEqual(dict1[key], dict2[key]) diff --git a/tests/utils/test_hf_argparser.py b/tests/utils/test_hf_argparser.py index 27ecb84306..2923deeb61 100644 --- a/tests/utils/test_hf_argparser.py +++ b/tests/utils/test_hf_argparser.py @@ -154,7 +154,7 @@ class HfArgumentParserTest(unittest.TestCase): # Choices with mixed type have custom function as "type" # So we need to compare results directly for equality - if xx.get("choices", None) and yy.get("choices", None): + if xx.get("choices") and yy.get("choices"): for expected_choice in yy["choices"] + xx["choices"]: self.assertEqual(xx["type"](expected_choice), yy["type"](expected_choice)) del xx["type"], yy["type"] diff --git a/tests/utils/test_import_structure.py b/tests/utils/test_import_structure.py index d6382a5fdf..1a4588a0d3 100644 --- a/tests/utils/test_import_structure.py +++ b/tests/utils/test_import_structure.py @@ -71,7 +71,7 @@ class TestImportStructures(unittest.TestCase): } self.assertEqual(len(import_structure.keys()), len(valid_frozensets.keys())) - for _frozenset in valid_frozensets.keys(): + for _frozenset in valid_frozensets: self.assertTrue(_frozenset in import_structure) self.assertListEqual(list(import_structure[_frozenset].keys()), list(valid_frozensets[_frozenset].keys())) for module, objects in valid_frozensets[_frozenset].items(): diff --git a/utils/add_pipeline_model_mapping_to_test.py b/utils/add_pipeline_model_mapping_to_test.py index f036e44495..636f018eb5 100644 --- a/utils/add_pipeline_model_mapping_to_test.py +++ b/utils/add_pipeline_model_mapping_to_test.py @@ -37,7 +37,7 @@ from tests.test_pipeline_mixin import pipeline_test_mapping PIPELINE_TEST_MAPPING = {} -for task in pipeline_test_mapping.keys(): +for task in pipeline_test_mapping: PIPELINE_TEST_MAPPING[task] = {"pt": None, "tf": None} diff --git a/utils/check_copies.py b/utils/check_copies.py index d53d1fc9ca..e48163421a 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -731,8 +731,8 @@ def is_copy_consistent( # Below, we change some names in `theoretical_code_blocks` and `observed_code_blocks`. These mappings map the # original names to the modified names: this is used to restore the original order of the code blocks. - name_mappings_1 = {k: k for k in theoretical_code_blocks.keys()} - name_mappings_2 = {k: k for k in observed_code_blocks.keys()} + name_mappings_1 = {k: k for k in theoretical_code_blocks} + name_mappings_2 = {k: k for k in observed_code_blocks} # Update code blocks' name and content: # If `"# Ignore copy"` is found in a block of the observed code: diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index dd10a7d9b1..a9ca4f1d56 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -913,7 +913,7 @@ def match_docstring_with_signature(obj: Any) -> Optional[tuple[str, str]]: missing = set(signature.keys()) - set(old_arguments) new_param_docs.extend([arguments[name] for name in missing if len(arguments[name]) > 0]) else: - new_param_docs = [arguments[name] for name in signature.keys() if len(arguments[name]) > 0] + new_param_docs = [arguments[name] for name in signature if len(arguments[name]) > 0] new_doc_arg = "\n".join(new_param_docs) return old_doc_arg, new_doc_arg diff --git a/utils/check_dummies.py b/utils/check_dummies.py index f429e8b14a..b9757bdb02 100644 --- a/utils/check_dummies.py +++ b/utils/check_dummies.py @@ -199,8 +199,7 @@ def check_dummies(overwrite: bool = False): # Locate actual dummy modules and read their content. path = os.path.join(PATH_TO_TRANSFORMERS, "utils") dummy_file_paths = { - backend: os.path.join(path, f"dummy_{short_names.get(backend, backend)}_objects.py") - for backend in dummy_files.keys() + backend: os.path.join(path, f"dummy_{short_names.get(backend, backend)}_objects.py") for backend in dummy_files } actual_dummies = {} @@ -212,7 +211,7 @@ def check_dummies(overwrite: bool = False): actual_dummies[backend] = "" # Compare actual with what they should be. - for backend in dummy_files.keys(): + for backend in dummy_files: if dummy_files[backend] != actual_dummies[backend]: if overwrite: print( diff --git a/utils/check_inits.py b/utils/check_inits.py index b392d09a89..bc211baa9e 100644 --- a/utils/check_inits.py +++ b/utils/check_inits.py @@ -257,7 +257,7 @@ def analyze_results(import_dict_objects: dict[str, list[str]], type_hint_objects errors = [] # Find all errors. - for key in import_dict_objects.keys(): + for key in import_dict_objects: # Duplicate imports in any half. duplicate_imports = find_duplicates(import_dict_objects[key]) if duplicate_imports: diff --git a/utils/check_modular_conversion.py b/utils/check_modular_conversion.py index 15dcf418d6..dad6b1f0a2 100644 --- a/utils/check_modular_conversion.py +++ b/utils/check_modular_conversion.py @@ -71,7 +71,7 @@ def compare_files(modular_file_path, show_diff=True): # Generate the expected modeling content generated_modeling_content = convert_modular_file(modular_file_path) diff = 0 - for file_type in generated_modeling_content.keys(): + for file_type in generated_modeling_content: diff += process_file(modular_file_path, generated_modeling_content, file_type, show_diff) return diff diff --git a/utils/create_dependency_mapping.py b/utils/create_dependency_mapping.py index 7590fda98c..84fec10e93 100644 --- a/utils/create_dependency_mapping.py +++ b/utils/create_dependency_mapping.py @@ -17,7 +17,7 @@ def topological_sort(dependencies: dict) -> list[list[str]]: """ # Nodes are the name of the models to convert (we only add those to the graph) - nodes = {node.rsplit("modular_", 1)[1].replace(".py", "") for node in dependencies.keys()} + nodes = {node.rsplit("modular_", 1)[1].replace(".py", "") for node in dependencies} # This will be a graph from models to convert, to models to convert that should be converted before (as they are a dependency) graph = {} name_mapping = {} diff --git a/utils/create_dummy_models.py b/utils/create_dummy_models.py index e151b37d52..c8e2bd4557 100644 --- a/utils/create_dummy_models.py +++ b/utils/create_dummy_models.py @@ -424,7 +424,7 @@ def get_tiny_config(config_class, model_class=None, **model_tester_kwargs): # `text_model_tester` via `text_kwargs`. The same trick is also necessary for `Flava`. if "vocab_size" in model_tester_kwargs: - if "text_kwargs" in inspect.signature(model_tester_class.__init__).parameters.keys(): + if "text_kwargs" in inspect.signature(model_tester_class.__init__).parameters: vocab_size = model_tester_kwargs.pop("vocab_size") model_tester_kwargs["text_kwargs"] = {"vocab_size": vocab_size} diff --git a/utils/modular_model_converter.py b/utils/modular_model_converter.py index 3b447dc7bf..ade3589868 100644 --- a/utils/modular_model_converter.py +++ b/utils/modular_model_converter.py @@ -57,7 +57,7 @@ def get_module_source_from_name(module_name: str) -> str: def preserve_case_replace(text, patterns: dict, default_name: str): # Create a regex pattern to match all variations - regex_pattern = "|".join(re.escape(key) for key in patterns.keys()) + regex_pattern = "|".join(re.escape(key) for key in patterns) compiled_regex = re.compile(f"(? import_ref_count[name] = max(ref_count, import_ref_count[name]) # Similar imports may be redefined, and only used between their 1st and 2nd definition so if we already have # a ref count > 0 at any point, the imports is actually used - unused_imports = {name for name, count in import_ref_count.items() if count <= 0 or name in body.keys()} + unused_imports = {name for name, count in import_ref_count.items() if count <= 0 or name in body} imports_to_keep = [] # We need to keep track of which names were already imported, because some import may be duplicated from multiple sources @@ -1494,8 +1492,8 @@ class ModularFileMapper(ModuleMapper): final_name_mapping[file] = get_lowercase_name(final_name) # Check we are not missing imported files - for file in self.model_specific_modules.keys(): - if file not in final_name_mapping.keys(): + for file in self.model_specific_modules: + if file not in final_name_mapping: final_name_mapping[file] = self.model_name return final_name_mapping @@ -1614,7 +1612,7 @@ def get_class_node_and_dependencies( nodes_to_add = { dep: (relative_dependency_order[dep], modular_mapper.global_nodes[dep]) for dep in all_dependencies_to_add - if dep not in file_to_update.keys() + if dep not in file_to_update } # Add the class node itself to the nodes to add @@ -1710,7 +1708,7 @@ def convert_modular_file(modular_file): def save_modeling_file(modular_file, converted_file): - for file_type in converted_file.keys(): + for file_type in converted_file: file_name_prefix = file_type.split("*")[0] file_name_suffix = file_type.split("*")[-1] if "*" in file_type else "" new_file_name = modular_file.replace("modular_", f"{file_name_prefix}_").replace( diff --git a/utils/notification_service.py b/utils/notification_service.py index 2848c5670c..e751794539 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -1199,7 +1199,7 @@ if __name__ == "__main__": unclassified_model_failures = [] - for matrix_name in matrix_job_results.keys(): + for matrix_name in matrix_job_results: for artifact_path_dict in available_artifacts[f"{report_name_prefix}_{matrix_name}_test_reports"].paths: path = artifact_path_dict["path"] artifact_gpu = artifact_path_dict["gpu"] @@ -1323,10 +1323,10 @@ if __name__ == "__main__": "failures": {}, "job_link": {}, } - for key in additional_files.keys() + for key in additional_files } - for key in additional_results.keys(): + for key in additional_results: # If a whole suite of test fails, the artifact isn't available. if additional_files[key] not in available_artifacts: additional_results[key]["error"] = True diff --git a/utils/split_doctest_jobs.py b/utils/split_doctest_jobs.py index 7e670840b7..fd887394de 100644 --- a/utils/split_doctest_jobs.py +++ b/utils/split_doctest_jobs.py @@ -69,7 +69,7 @@ if __name__ == "__main__": raw_test_collection_map[file_dir].append(file) refined_test_collection_map = {} - for file_dir in raw_test_collection_map.keys(): + for file_dir in raw_test_collection_map: if file_dir in ["docs/source/en/model_doc", "docs/source/en/tasks"]: for file in raw_test_collection_map[file_dir]: refined_test_collection_map[file] = file