diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index 66bd729075..ef9c515da4 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -892,14 +892,12 @@ def main(): flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) diff --git a/examples/flax/language-modeling/run_bart_dlm_flax.py b/examples/flax/language-modeling/run_bart_dlm_flax.py index 0a97bffd93..62e4e8a839 100644 --- a/examples/flax/language-modeling/run_bart_dlm_flax.py +++ b/examples/flax/language-modeling/run_bart_dlm_flax.py @@ -756,14 +756,12 @@ def main(): flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index 607c9bb1ee..952419dc96 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -648,14 +648,12 @@ def main(): flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index 6a06533b14..ae289b8470 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -679,14 +679,12 @@ def main(): flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index 814d68a88e..152760f4bf 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -791,14 +791,12 @@ def main(): flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 628b9b81b2..7933c3bd3e 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -333,14 +333,12 @@ def create_train_state( flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) @@ -642,7 +640,7 @@ def main(): return tokenized_examples - processed_raw_datasets = dict() + processed_raw_datasets = {} if training_args.do_train: if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index feda695920..67f164bc0b 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -742,14 +742,12 @@ def main(): flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index c47ea90d39..4fd12404d4 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -229,14 +229,12 @@ def create_train_state( flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) @@ -449,7 +447,7 @@ def main(): ): # Some have all caps in their config, some don't. label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): logger.info( f"The configuration of the model provided the following label correspondence: {label_name_to_id}. " "Using it!" @@ -458,7 +456,7 @@ def main(): else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}." "\nIgnoring the model labels as a result.", ) elif data_args.task_name is None: diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index c7509433d9..d176765289 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -290,14 +290,12 @@ def create_train_state( flat_params = traverse_util.flatten_dict(params) # find out all LayerNorm parameters layer_norm_candidates = ["layernorm", "layer_norm", "ln"] - layer_norm_named_params = set( - [ - layer[-2:] - for layer_norm_name in layer_norm_candidates - for layer in flat_params.keys() - if layer_norm_name in "".join(layer).lower() - ] - ) + layer_norm_named_params = { + layer[-2:] + for layer_norm_name in layer_norm_candidates + for layer in flat_params.keys() + if layer_norm_name in "".join(layer).lower() + } flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params} return traverse_util.unflatten_dict(flat_mask) diff --git a/examples/legacy/pytorch-lightning/run_glue.py b/examples/legacy/pytorch-lightning/run_glue.py index aa2349f280..f96c5bafcd 100644 --- a/examples/legacy/pytorch-lightning/run_glue.py +++ b/examples/legacy/pytorch-lightning/run_glue.py @@ -192,7 +192,7 @@ def main(): # Optionally, predict on dev set and write to output_dir if args.do_predict: - checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpoint-epoch=*.ckpt"), recursive=True))) + checkpoints = sorted(glob.glob(os.path.join(args.output_dir, "checkpoint-epoch=*.ckpt"), recursive=True)) model = model.load_from_checkpoint(checkpoints[-1]) return trainer.test(model) diff --git a/examples/legacy/pytorch-lightning/run_ner.py b/examples/legacy/pytorch-lightning/run_ner.py index 3bcbdfee03..473851edef 100644 --- a/examples/legacy/pytorch-lightning/run_ner.py +++ b/examples/legacy/pytorch-lightning/run_ner.py @@ -211,6 +211,6 @@ if __name__ == "__main__": # pl use this default format to create a checkpoint: # https://github.com/PyTorchLightning/pytorch-lightning/blob/master\ # /pytorch_lightning/callbacks/model_checkpoint.py#L322 - checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpoint-epoch=*.ckpt"), recursive=True))) + checkpoints = sorted(glob.glob(os.path.join(args.output_dir, "checkpoint-epoch=*.ckpt"), recursive=True)) model = model.load_from_checkpoint(checkpoints[-1]) trainer.test(model) diff --git a/examples/legacy/question-answering/run_squad.py b/examples/legacy/question-answering/run_squad.py index d966b3f02f..fc9411e95d 100644 --- a/examples/legacy/question-answering/run_squad.py +++ b/examples/legacy/question-answering/run_squad.py @@ -810,10 +810,10 @@ def main(): logger.info("Loading checkpoints saved during training for evaluation") checkpoints = [args.output_dir] if args.eval_all_checkpoints: - checkpoints = list( + checkpoints = [ os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)) - ) + ] else: logger.info("Loading checkpoint %s for evaluation", args.model_name_or_path) @@ -830,7 +830,7 @@ def main(): # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step) - result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items()) + result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()} results.update(result) logger.info("Results: {}".format(results)) diff --git a/examples/legacy/run_openai_gpt.py b/examples/legacy/run_openai_gpt.py index 1f02570f8f..03031f2057 100755 --- a/examples/legacy/run_openai_gpt.py +++ b/examples/legacy/run_openai_gpt.py @@ -189,7 +189,7 @@ def main(): return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(obj)) elif isinstance(obj, int): return obj - return list(tokenize_and_encode(o) for o in obj) + return [tokenize_and_encode(o) for o in obj] logger.info("Encoding dataset...") train_dataset = load_rocstories_dataset(args.train_dataset) diff --git a/examples/legacy/run_swag.py b/examples/legacy/run_swag.py index 5cac156724..bde0501687 100755 --- a/examples/legacy/run_swag.py +++ b/examples/legacy/run_swag.py @@ -696,9 +696,9 @@ def main(): checkpoints = [args.model_name_or_path] if args.eval_all_checkpoints: - checkpoints = list( + checkpoints = [ os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)) - ) + ] logger.info("Evaluate the following checkpoints: %s", checkpoints) @@ -712,7 +712,7 @@ def main(): # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step) - result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items()) + result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()} results.update(result) logger.info("Results: {}".format(results)) diff --git a/examples/legacy/seq2seq/run_distributed_eval.py b/examples/legacy/seq2seq/run_distributed_eval.py index 655807ba17..55f3839d73 100755 --- a/examples/legacy/seq2seq/run_distributed_eval.py +++ b/examples/legacy/seq2seq/run_distributed_eval.py @@ -111,7 +111,7 @@ def eval_data_dir( if num_return_sequences > 1: preds = chunks(preds, num_return_sequences) # batch size chunks, each of size num_return_seq for i, pred in enumerate(preds): - results.append(dict(pred=pred, id=ids[i].item())) + results.append({"pred": pred, "id": ids[i].item()}) save_json(results, save_path) return results, sampler.num_replicas @@ -232,7 +232,7 @@ def combine_partial_results(partial_results) -> List: records = [] for partial_result in partial_results: records.extend(partial_result) - records = list(sorted(records, key=lambda x: x["id"])) + records = sorted(records, key=lambda x: x["id"]) preds = [x["pred"] for x in records] return preds diff --git a/examples/legacy/seq2seq/run_eval.py b/examples/legacy/seq2seq/run_eval.py index a8aa8e7ef9..35e11c86a1 100755 --- a/examples/legacy/seq2seq/run_eval.py +++ b/examples/legacy/seq2seq/run_eval.py @@ -76,7 +76,7 @@ def generate_summaries_or_translations( fout.close() runtime = int(time.time() - start_time) # seconds n_obs = len(examples) - return dict(n_obs=n_obs, runtime=runtime, seconds_per_sample=round(runtime / n_obs, 4)) + return {"n_obs": n_obs, "runtime": runtime, "seconds_per_sample": round(runtime / n_obs, 4)} def datetime_now(): diff --git a/examples/legacy/seq2seq/run_eval_search.py b/examples/legacy/seq2seq/run_eval_search.py index c72f038fc5..1ed08c2274 100755 --- a/examples/legacy/seq2seq/run_eval_search.py +++ b/examples/legacy/seq2seq/run_eval_search.py @@ -36,7 +36,7 @@ def parse_search_arg(search): groups = search.split() entries = {k: vs for k, vs in (g.split("=") for g in groups)} entry_names = list(entries.keys()) - sets = [list(f"--{k} {v}" for v in vs.split(":")) for k, vs in entries.items()] + sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()] matrix = [list(x) for x in itertools.product(*sets)] return matrix, entry_names diff --git a/examples/legacy/seq2seq/utils.py b/examples/legacy/seq2seq/utils.py index 2655165cf1..d7cd84dedb 100644 --- a/examples/legacy/seq2seq/utils.py +++ b/examples/legacy/seq2seq/utils.py @@ -456,7 +456,7 @@ def pickle_save(obj, path): def flatten_list(summary_ids: List[List]): - return [x for x in itertools.chain.from_iterable(summary_ids)] + return list(itertools.chain.from_iterable(summary_ids)) def save_git_info(folder_path: str) -> None: diff --git a/examples/pytorch/audio-classification/run_audio_classification.py b/examples/pytorch/audio-classification/run_audio_classification.py index 20ddec4acb..054a0fd00e 100644 --- a/examples/pytorch/audio-classification/run_audio_classification.py +++ b/examples/pytorch/audio-classification/run_audio_classification.py @@ -293,7 +293,7 @@ def main(): audio["array"], max_length=data_args.max_length_seconds, sample_rate=feature_extractor.sampling_rate ) output_batch["input_values"].append(wav) - output_batch["labels"] = [label for label in batch[data_args.label_column_name]] + output_batch["labels"] = list(batch[data_args.label_column_name]) return output_batch @@ -303,14 +303,14 @@ def main(): for audio in batch[data_args.audio_column_name]: wav = audio["array"] output_batch["input_values"].append(wav) - output_batch["labels"] = [label for label in batch[data_args.label_column_name]] + output_batch["labels"] = list(batch[data_args.label_column_name]) return output_batch # Prepare label mappings. # We'll include these in the model's config to get human readable labels in the Inference API. labels = raw_datasets["train"].features[data_args.label_column_name].names - label2id, id2label = dict(), dict() + label2id, id2label = {}, {} for i, label in enumerate(labels): label2id[label] = str(i) id2label[str(i)] = label diff --git a/examples/pytorch/benchmarking/plot_csv_file.py b/examples/pytorch/benchmarking/plot_csv_file.py index 1a0ae735d8..9a9ad9c670 100644 --- a/examples/pytorch/benchmarking/plot_csv_file.py +++ b/examples/pytorch/benchmarking/plot_csv_file.py @@ -83,7 +83,7 @@ def can_convert_to_float(string): class Plot: def __init__(self, args): self.args = args - self.result_dict = defaultdict(lambda: dict(bsz=[], seq_len=[], result={})) + self.result_dict = defaultdict(lambda: {"bsz": [], "seq_len": [], "result": {}}) with open(self.args.csv_file, newline="") as csv_file: reader = csv.DictReader(csv_file) @@ -116,8 +116,8 @@ class Plot: axis.set_major_formatter(ScalarFormatter()) for model_name_idx, model_name in enumerate(self.result_dict.keys()): - batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"]))) - sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"]))) + batch_sizes = sorted(set(self.result_dict[model_name]["bsz"])) + sequence_lengths = sorted(set(self.result_dict[model_name]["seq_len"])) results = self.result_dict[model_name]["result"] (x_axis_array, inner_loop_array) = ( diff --git a/examples/pytorch/contrastive-image-text/run_clip.py b/examples/pytorch/contrastive-image-text/run_clip.py index 4669a9b93d..2a6b1dab77 100644 --- a/examples/pytorch/contrastive-image-text/run_clip.py +++ b/examples/pytorch/contrastive-image-text/run_clip.py @@ -397,7 +397,7 @@ def main(): # Preprocessing the datasets. # We need to tokenize input captions and transform the images. def tokenize_captions(examples): - captions = [caption for caption in examples[caption_column]] + captions = list(examples[caption_column]) text_inputs = tokenizer(captions, max_length=data_args.max_seq_length, padding="max_length", truncation=True) examples["input_ids"] = text_inputs.input_ids examples["attention_mask"] = text_inputs.attention_mask diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py index 78979e4155..114cf4dd0f 100644 --- a/examples/pytorch/image-classification/run_image_classification.py +++ b/examples/pytorch/image-classification/run_image_classification.py @@ -250,7 +250,7 @@ def main(): # Prepare label mappings. # We'll include these in the model's config to get human readable labels in the Inference API. labels = dataset["train"].features["labels"].names - label2id, id2label = dict(), dict() + label2id, id2label = {}, {} for i, label in enumerate(labels): label2id[label] = str(i) id2label[str(i)] = label diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py index f3448a7753..55cde66048 100644 --- a/examples/pytorch/image-pretraining/run_mae.py +++ b/examples/pytorch/image-pretraining/run_mae.py @@ -91,7 +91,7 @@ class DataTrainingArguments: ) def __post_init__(self): - data_files = dict() + data_files = {} if self.train_dir is not None: data_files["train"] = self.train_dir if self.validation_dir is not None: diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py index a906088ed5..d57f201f09 100644 --- a/examples/pytorch/image-pretraining/run_mim.py +++ b/examples/pytorch/image-pretraining/run_mim.py @@ -104,7 +104,7 @@ class DataTrainingArguments: ) def __post_init__(self): - data_files = dict() + data_files = {} if self.train_dir is not None: data_files["train"] = self.train_dir if self.validation_dir is not None: diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index ae01b7614e..23c4abb54b 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -407,7 +407,7 @@ def main(): ) else: model = AutoModelForCausalLM.from_config(config) - n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values()) + n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index a69171766a..cf1607dccf 100755 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -457,14 +457,14 @@ def main(): trainer.log_metrics("eval", metrics) trainer.save_metrics("eval", metrics) - kwargs = dict( - finetuned_from=model_args.model_name_or_path, - tasks="multiple-choice", - dataset_tags="swag", - dataset_args="regular", - dataset="SWAG", - language="en", - ) + kwargs = { + "finetuned_from": model_args.model_name_or_path, + "tasks": "multiple-choice", + "dataset_tags": "swag", + "dataset_args": "regular", + "dataset": "SWAG", + "language": "en", + } if training_args.push_to_hub: trainer.push_to_hub(**kwargs) diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py index b1583aca1f..a1fe0103a0 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py @@ -430,7 +430,7 @@ def main(): pixel_values.append(image) labels.append(target) - encoding = dict() + encoding = {} encoding["pixel_values"] = torch.stack(pixel_values) encoding["labels"] = torch.stack(labels) @@ -444,7 +444,7 @@ def main(): pixel_values.append(image) labels.append(target) - encoding = dict() + encoding = {} encoding["pixel_values"] = torch.stack(pixel_values) encoding["labels"] = torch.stack(labels) diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index 68919e0cc5..702adb0151 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -441,7 +441,7 @@ def main(): pixel_values.append(image) labels.append(target) - encoding = dict() + encoding = {} encoding["pixel_values"] = torch.stack(pixel_values) encoding["labels"] = torch.stack(labels) @@ -455,7 +455,7 @@ def main(): pixel_values.append(image) labels.append(target) - encoding = dict() + encoding = {} encoding["pixel_values"] = torch.stack(pixel_values) encoding["labels"] = torch.stack(labels) diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index c6cd82b436..f600c03f23 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -349,7 +349,7 @@ def create_vocabulary_from_data( lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values() ) - vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))} + vocab_dict = {v: k for k, v in enumerate(sorted(vocab_set))} # replace white space with delimiter token if word_delimiter_token is not None: diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 1e7ab53455..fd8ba016ac 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -406,12 +406,12 @@ def main(): ): # Some have all caps in their config, some don't. label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}." "\nIgnoring the model labels as a result.", ) elif data_args.task_name is None and not is_regression: diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 03de2cf6b5..ee7438071f 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -339,7 +339,7 @@ def main(): ): # Some have all caps in their config, some don't. label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): logger.info( f"The configuration of the model provided the following label correspondence: {label_name_to_id}. " "Using it!" @@ -348,7 +348,7 @@ def main(): else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}." "\nIgnoring the model labels as a result.", ) elif args.task_name is None and not is_regression: diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index 065880e7e2..e575ed689e 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -386,7 +386,7 @@ def main(): # Model has labels -> use them. if model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id: - if list(sorted(model.config.label2id.keys())) == list(sorted(label_list)): + if sorted(model.config.label2id.keys()) == sorted(label_list): # Reorganize `label_list` to match the ordering of the model. if labels_are_int: label_to_id = {i: int(model.config.label2id[l]) for i, l in enumerate(label_list)} @@ -397,8 +397,8 @@ def main(): else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:" - f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.", + f"model labels: {sorted(model.config.label2id.keys())}, dataset labels:" + f" {sorted(label_list)}.\nIgnoring the model labels as a result.", ) # Set the correspondences label/ID inside the model config diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index ad63047223..0c6fa85b6b 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -425,7 +425,7 @@ def main(): # Model has labels -> use them. if model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id: - if list(sorted(model.config.label2id.keys())) == list(sorted(label_list)): + if sorted(model.config.label2id.keys()) == sorted(label_list): # Reorganize `label_list` to match the ordering of the model. if labels_are_int: label_to_id = {i: int(model.config.label2id[l]) for i, l in enumerate(label_list)} @@ -436,8 +436,8 @@ def main(): else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:" - f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.", + f"model labels: {sorted(model.config.label2id.keys())}, dataset labels:" + f" {sorted(label_list)}.\nIgnoring the model labels as a result.", ) # Set the correspondences label/ID inside the model config diff --git a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py index aad680f201..8a59b46ab5 100755 --- a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py +++ b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py @@ -727,9 +727,9 @@ def main(): tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) checkpoints = [args.output_dir] if args.eval_all_checkpoints: - checkpoints = list( + checkpoints = [ os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)) - ) + ] logger.info("Evaluate the following checkpoints: %s", checkpoints) @@ -743,7 +743,7 @@ def main(): print(f"Evaluation for checkpoint {prefix}") for patience in patience_list: result = evaluate(args, model, tokenizer, prefix=prefix, patience=patience) - result = dict((k + "_{}".format(global_step), v) for k, v in result.items()) + result = {k + "_{}".format(global_step): v for k, v in result.items()} results.update(result) return results diff --git a/examples/research_projects/bertabs/modeling_bertabs.py b/examples/research_projects/bertabs/modeling_bertabs.py index 33e216f4a0..19e62804ef 100644 --- a/examples/research_projects/bertabs/modeling_bertabs.py +++ b/examples/research_projects/bertabs/modeling_bertabs.py @@ -54,7 +54,7 @@ class BertAbs(BertAbsPreTrainedModel): load_bert_pretrained_extractive = True if bert_extractive_checkpoint else False if load_bert_pretrained_extractive: self.bert.model.load_state_dict( - dict([(n[11:], p) for n, p in bert_extractive_checkpoint.items() if n.startswith("bert.model")]), + {n[11:]: p for n, p in bert_extractive_checkpoint.items() if n.startswith("bert.model")}, strict=True, ) diff --git a/examples/research_projects/bertology/run_bertology.py b/examples/research_projects/bertology/run_bertology.py index 030573d87f..4cb046066c 100644 --- a/examples/research_projects/bertology/run_bertology.py +++ b/examples/research_projects/bertology/run_bertology.py @@ -218,9 +218,9 @@ def prune_heads(args, model, eval_dataloader, head_mask): original_time = datetime.now() - before_time original_num_params = sum(p.numel() for p in model.parameters()) - heads_to_prune = dict( - (layer, (1 - head_mask[layer].long()).nonzero().squeeze().tolist()) for layer in range(len(head_mask)) - ) + heads_to_prune = { + layer: (1 - head_mask[layer].long()).nonzero().squeeze().tolist() for layer in range(len(head_mask)) + } assert sum(len(h) for h in heads_to_prune.values()) == (1 - head_mask.long()).sum().item() model.prune_heads(heads_to_prune) diff --git a/examples/research_projects/bertology/run_prune_gpt.py b/examples/research_projects/bertology/run_prune_gpt.py index 68cece6e99..fa7484a787 100644 --- a/examples/research_projects/bertology/run_prune_gpt.py +++ b/examples/research_projects/bertology/run_prune_gpt.py @@ -194,9 +194,9 @@ def prune_heads(args, model, eval_dataloader, head_mask): original_time = datetime.now() - before_time original_num_params = sum(p.numel() for p in model.parameters()) - heads_to_prune = dict( - (layer, (1 - head_mask[layer].long()).nonzero().squeeze().tolist()) for layer in range(len(head_mask)) - ) + heads_to_prune = { + layer: (1 - head_mask[layer].long()).nonzero().squeeze().tolist() for layer in range(len(head_mask)) + } for k, v in heads_to_prune.items(): if isinstance(v, int): diff --git a/examples/research_projects/codeparrot/scripts/minhash_deduplication.py b/examples/research_projects/codeparrot/scripts/minhash_deduplication.py index 195a9dc809..f198471127 100644 --- a/examples/research_projects/codeparrot/scripts/minhash_deduplication.py +++ b/examples/research_projects/codeparrot/scripts/minhash_deduplication.py @@ -29,7 +29,7 @@ def get_min_hash(tokens: List[str]) -> Optional[MinHash]: def get_tokens(code: str) -> Set[str]: """Tokenize a code snippet.""" - return set([t for t in NON_ALPHA.split(code) if len(t.strip()) > 0]) + return {t for t in NON_ALPHA.split(code) if len(t.strip()) > 0} class DuplicationIndex: @@ -243,7 +243,7 @@ def deduplicate_dataset( >>> ds_dedup, duplicate_clusters = deduplicate_dataset(ds, jaccard_threshold=0.85) """ duplicate_clusters = make_duplicate_clusters(dataset, jaccard_threshold) - duplicate_indices = set(x["base_index"] for cluster in duplicate_clusters for x in cluster) + duplicate_indices = {x["base_index"] for cluster in duplicate_clusters for x in cluster} extreme_dict = {} extremes_clusters = find_extremes(duplicate_clusters, dataset, jaccard_threshold) for extremes in extremes_clusters: diff --git a/examples/research_projects/codeparrot/scripts/preprocessing.py b/examples/research_projects/codeparrot/scripts/preprocessing.py index 07540d0b62..aecc37223f 100644 --- a/examples/research_projects/codeparrot/scripts/preprocessing.py +++ b/examples/research_projects/codeparrot/scripts/preprocessing.py @@ -114,7 +114,7 @@ def char_token_ratio(example): def preprocess(example): """Chain all preprocessing steps into one function to not fill cache.""" - results = dict() + results = {} results.update(get_hash(example)) results.update(line_stats(example)) results.update(alpha_stats(example)) diff --git a/examples/research_projects/codeparrot/scripts/pretokenizing.py b/examples/research_projects/codeparrot/scripts/pretokenizing.py index 5eb793d10d..7cac8f5119 100644 --- a/examples/research_projects/codeparrot/scripts/pretokenizing.py +++ b/examples/research_projects/codeparrot/scripts/pretokenizing.py @@ -8,7 +8,7 @@ from transformers import AutoTokenizer, HfArgumentParser def tokenize(example): - output = dict() + output = {} output["input_ids"] = tokenizer(example["content"], truncation=False)["input_ids"] output["ratio_char_token"] = len(example["content"]) / len(output["input_ids"]) return output diff --git a/examples/research_projects/deebert/run_glue_deebert.py b/examples/research_projects/deebert/run_glue_deebert.py index f86390375f..6f7cfe65d0 100644 --- a/examples/research_projects/deebert/run_glue_deebert.py +++ b/examples/research_projects/deebert/run_glue_deebert.py @@ -685,9 +685,9 @@ def main(): tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) checkpoints = [args.output_dir] if args.eval_all_checkpoints: - checkpoints = list( + checkpoints = [ os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)) - ) + ] logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: @@ -725,7 +725,7 @@ def main(): for i in range(model.num_layers): info_str += " {:.2f}".format(100 * each_layer_results[i]) logger.info(info_str) - result = dict((k + "_{}".format(global_step), v) for k, v in result.items()) + result = {k + "_{}".format(global_step): v for k, v in result.items()} results.update(result) return results diff --git a/examples/research_projects/distillation/grouped_batch_sampler.py b/examples/research_projects/distillation/grouped_batch_sampler.py index 83addc371f..a068f7e09e 100644 --- a/examples/research_projects/distillation/grouped_batch_sampler.py +++ b/examples/research_projects/distillation/grouped_batch_sampler.py @@ -27,7 +27,7 @@ from utils import logger def _quantize(x, bins): bins = copy.deepcopy(bins) bins = sorted(bins) - quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) + quantized = [bisect.bisect_right(bins, y) for y in x] return quantized diff --git a/examples/research_projects/distillation/run_squad_w_distillation.py b/examples/research_projects/distillation/run_squad_w_distillation.py index aba91995da..4b8b8e542f 100644 --- a/examples/research_projects/distillation/run_squad_w_distillation.py +++ b/examples/research_projects/distillation/run_squad_w_distillation.py @@ -850,9 +850,9 @@ def main(): logger.info("Loading checkpoints saved during training for evaluation") checkpoints = [args.output_dir] if args.eval_all_checkpoints: - checkpoints = list( + checkpoints = [ os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)) - ) + ] logger.info("Evaluate the following checkpoints: %s", checkpoints) @@ -865,7 +865,7 @@ def main(): # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step) - result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items()) + result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()} results.update(result) logger.info("Results: {}".format(results)) diff --git a/examples/research_projects/jax-projects/big_bird/bigbird_flax.py b/examples/research_projects/jax-projects/big_bird/bigbird_flax.py index ac37cbc860..af5e11c83a 100644 --- a/examples/research_projects/jax-projects/big_bird/bigbird_flax.py +++ b/examples/research_projects/jax-projects/big_bird/bigbird_flax.py @@ -247,9 +247,12 @@ class Trainer: lr = self.scheduler_fn(state_step - 1) eval_loss = self.evaluate(state, val_dataset) - logging_dict = dict( - step=state_step.item(), eval_loss=eval_loss.item(), tr_loss=tr_loss, lr=lr.item() - ) + logging_dict = { + "step": state_step.item(), + "eval_loss": eval_loss.item(), + "tr_loss": tr_loss, + "lr": lr.item(), + } tqdm.write(str(logging_dict)) self.logger.log(logging_dict, commit=True) diff --git a/examples/research_projects/jax-projects/big_bird/evaluate.py b/examples/research_projects/jax-projects/big_bird/evaluate.py index 32ca5172a5..04e9e01ca2 100644 --- a/examples/research_projects/jax-projects/big_bird/evaluate.py +++ b/examples/research_projects/jax-projects/big_bird/evaluate.py @@ -144,9 +144,9 @@ def main(): predictions = expand_to_aliases(example["output"]) # some preprocessing to both prediction and answer - answers = set(["".join(a.split()) for a in answers]) - predictions = set(["".join(p.split()) for p in predictions]) - predictions = set([s for s in predictions if s not in ["``", "''", "`", "'"]]) + answers = {"".join(a.split()) for a in answers} + predictions = {"".join(p.split()) for p in predictions} + predictions = {s for s in predictions if s not in ["``", "''", "`", "'"]} # if there is a common element, it's a exact match example["match"] = len(list(answers & predictions)) > 0 diff --git a/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py b/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py index 22dc3e4550..6a202ba775 100644 --- a/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py +++ b/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py @@ -314,12 +314,12 @@ if __name__ == "__main__": data = data["train" if PROCESS_TRAIN == "true" else "validation"] - fn_kwargs = dict( - tokenizer=tokenizer, - doc_stride=DOC_STRIDE, - max_length=MAX_LENGTH, - assertion=False, - ) + fn_kwargs = { + "tokenizer": tokenizer, + "doc_stride": DOC_STRIDE, + "max_length": MAX_LENGTH, + "assertion": False, + } data = data.map(prepare_inputs, fn_kwargs=fn_kwargs) data = data.remove_columns(["annotations", "document", "id", "question"]) print(data) diff --git a/examples/research_projects/jax-projects/model_parallel/partitions.py b/examples/research_projects/jax-projects/model_parallel/partitions.py index e32ec97e42..86e54ad670 100644 --- a/examples/research_projects/jax-projects/model_parallel/partitions.py +++ b/examples/research_projects/jax-projects/model_parallel/partitions.py @@ -34,7 +34,7 @@ empty_dict = object() def _match(qs, ks): """Return True if regexes in qs match any window of strings in tuple ks.""" # compile regexes and force complete match - qts = tuple(map(lambda x: re.compile(x + "$"), qs)) + qts = tuple((re.compile(x + "$") for x in qs)) for i in range(len(ks) - len(qs) + 1): matches = [x.match(y) for x, y in zip(qts, ks[i:])] if matches and all(matches): diff --git a/examples/research_projects/longform-qa/eli5_utils.py b/examples/research_projects/longform-qa/eli5_utils.py index db4eae6604..d4b235fdba 100644 --- a/examples/research_projects/longform-qa/eli5_utils.py +++ b/examples/research_projects/longform-qa/eli5_utils.py @@ -78,7 +78,7 @@ def query_es_index(question, es_client, index_name="english_wiki_kilt_snippets_1 ) hits = response["hits"]["hits"] support_doc = "

" + "

".join([hit["_source"]["passage_text"] for hit in hits]) - res_list = [dict([(k, hit["_source"][k]) for k in hit["_source"] if k != "passage_text"]) for hit in hits] + res_list = [{k: hit["_source"][k] for k in hit["_source"] if k != "passage_text"} for hit in hits] for r, hit in zip(res_list, hits): r["passage_id"] = hit["_id"] r["score"] = hit["_score"] @@ -601,7 +601,7 @@ def make_qa_dense_index( fp = np.memmap(index_name, dtype=dtype, mode="w+", shape=(passages_dset.num_rows, 128)) n_batches = math.ceil(passages_dset.num_rows / batch_size) for i in range(n_batches): - passages = [p for p in passages_dset[i * batch_size : (i + 1) * batch_size]["passage_text"]] + passages = list(passages_dset[i * batch_size : (i + 1) * batch_size]["passage_text"]) reps = embed_passages_for_retrieval(passages, tokenizer, qa_embedder, max_length, device) fp[i * batch_size : (i + 1) * batch_size] = reps if i % 50 == 0: @@ -634,7 +634,7 @@ def query_qa_dense_index( D, I = wiki_index.search(q_rep, 2 * n_results) res_passages = [wiki_passages[int(i)] for i in I[0]] support_doc = "

" + "

".join([p["passage_text"] for p in res_passages]) - res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages] + res_list = [{k: p[k] for k in wiki_passages.column_names} for p in res_passages] res_list = [res for res in res_list if len(res["passage_text"].split()) > min_length][:n_results] for r, sc in zip(res_list, D[0]): r["score"] = float(sc) @@ -650,7 +650,7 @@ def batch_query_qa_dense_index(questions, qa_embedder, tokenizer, wiki_passages, ] all_res_lists = [] for res_passages, dl in zip(res_passages_lst, D): - res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages] + res_list = [{k: p[k] for k in wiki_passages.column_names} for p in res_passages] for r, sc in zip(res_list, dl): r["score"] = float(sc) all_res_lists += [res_list[:]] @@ -663,7 +663,7 @@ def query_qa_dense_index_nn(passage, qa_embedder, tokenizer, wiki_passages, wiki D, I = wiki_index.search(a_rep, 2 * n_results) res_passages = [wiki_passages[int(i)] for i in I[0]] support_doc = "

" + "

".join([p["passage_text"] for p in res_passages]) - res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages] + res_list = [{k: p[k] for k in wiki_passages.column_names} for p in res_passages] res_list = [res for res in res_list if len(res["passage_text"].split()) > min_length][:n_results] for r, sc, i in zip(res_list, D[0], I[0]): r["passage_id"] = int(i) @@ -680,7 +680,7 @@ def batch_query_qa_dense_index_nn(passages, qa_embedder, tokenizer, wiki_passage ] all_res_lists = [] for res_passages, dl, il in zip(res_passages_lst, D, I): - res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages] + res_list = [{k: p[k] for k in wiki_passages.column_names} for p in res_passages] for r, sc, i in zip(res_list, dl, il): r["passage_id"] = int(i) r["score"] = float(sc) diff --git a/examples/research_projects/lxmert/extracting_data.py b/examples/research_projects/lxmert/extracting_data.py index 9c445be336..6b1342c9b1 100644 --- a/examples/research_projects/lxmert/extracting_data.py +++ b/examples/research_projects/lxmert/extracting_data.py @@ -61,7 +61,7 @@ class Extract: assert outputfile is not None and not os.path.isfile(outputfile), f"{outputfile}" if subset_list is not None: with open(os.path.realpath(subset_list)) as f: - self.subset_list = set(map(lambda x: self._vqa_file_split()[0], tryload(f))) + self.subset_list = {self._vqa_file_split()[0] for x in tryload(f)} else: self.subset_list = None diff --git a/examples/research_projects/lxmert/modeling_frcnn.py b/examples/research_projects/lxmert/modeling_frcnn.py index 08758b1d3c..edbd224cbe 100644 --- a/examples/research_projects/lxmert/modeling_frcnn.py +++ b/examples/research_projects/lxmert/modeling_frcnn.py @@ -1095,7 +1095,7 @@ class ROIPooler(nn.Module): Returns: A tensor of shape(N*B, Channels, output_size, output_size) """ - x = [v for v in feature_maps.values()] + x = list(feature_maps.values()) num_level_assignments = len(self.level_poolers) assert len(x) == num_level_assignments and len(boxes) == x[0].size(0) diff --git a/examples/research_projects/mm-imdb/run_mmimdb.py b/examples/research_projects/mm-imdb/run_mmimdb.py index 23b2a65e5c..2cc3bc3a0c 100644 --- a/examples/research_projects/mm-imdb/run_mmimdb.py +++ b/examples/research_projects/mm-imdb/run_mmimdb.py @@ -554,9 +554,9 @@ def main(): if args.do_eval and args.local_rank in [-1, 0]: checkpoints = [args.output_dir] if args.eval_all_checkpoints: - checkpoints = list( + checkpoints = [ os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)) - ) + ] logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: @@ -566,7 +566,7 @@ def main(): model.load_state_dict(torch.load(checkpoint)) model.to(args.device) result = evaluate(args, model, tokenizer, criterion, prefix=prefix) - result = dict((k + "_{}".format(global_step), v) for k, v in result.items()) + result = {k + "_{}".format(global_step): v for k, v in result.items()} results.update(result) return results diff --git a/examples/research_projects/movement-pruning/masked_run_glue.py b/examples/research_projects/movement-pruning/masked_run_glue.py index 4ce56e524f..a28cdcc583 100644 --- a/examples/research_projects/movement-pruning/masked_run_glue.py +++ b/examples/research_projects/movement-pruning/masked_run_glue.py @@ -941,9 +941,9 @@ def main(): tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case) checkpoints = [args.output_dir] if args.eval_all_checkpoints: - checkpoints = list( + checkpoints = [ os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)) - ) + ] logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: @@ -953,7 +953,7 @@ def main(): model = model_class.from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, tokenizer, prefix=prefix) - result = dict((k + "_{}".format(global_step), v) for k, v in result.items()) + result = {k + "_{}".format(global_step): v for k, v in result.items()} results.update(result) return results diff --git a/examples/research_projects/movement-pruning/masked_run_squad.py b/examples/research_projects/movement-pruning/masked_run_squad.py index a516bb8d58..189ed5be67 100644 --- a/examples/research_projects/movement-pruning/masked_run_squad.py +++ b/examples/research_projects/movement-pruning/masked_run_squad.py @@ -1109,10 +1109,10 @@ def main(): logger.info("Loading checkpoints saved during training for evaluation") checkpoints = [args.output_dir] if args.eval_all_checkpoints: - checkpoints = list( + checkpoints = [ os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True)) - ) + ] else: logger.info("Loading checkpoint %s for evaluation", args.model_name_or_path) @@ -1129,7 +1129,7 @@ def main(): # Evaluate result = evaluate(args, model, tokenizer, prefix=global_step) - result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items()) + result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()} results.update(result) logger.info("Results: {}".format(results)) diff --git a/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py b/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py index d327cdb284..1df20e4504 100644 --- a/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py +++ b/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py @@ -42,8 +42,8 @@ def _graph_replace_input_with(graph_proto, name, new_name): def _remove_dup_initializers_from_model(model, model_without_ext, ind_to_replace): - inits_with_data = [i for i in model.graph.initializer] - inits = [i for i in model_without_ext.graph.initializer] + inits_with_data = list(model.graph.initializer) + inits = list(model_without_ext.graph.initializer) for i, ref_i in ind_to_replace: assert inits_with_data[i].name == inits[i].name assert inits_with_data[ref_i].name == inits[ref_i].name @@ -69,7 +69,7 @@ def remove_dup_initializers(onnx_file_path): model = onnx.load(os.path.join(model_file_folder, model_file_name)) - inits = [i for i in model.graph.initializer] + inits = list(model.graph.initializer) dup_set = set() dup_map = {} diff --git a/examples/research_projects/pplm/run_pplm.py b/examples/research_projects/pplm/run_pplm.py index 54784b944c..54008d56c1 100644 --- a/examples/research_projects/pplm/run_pplm.py +++ b/examples/research_projects/pplm/run_pplm.py @@ -127,11 +127,9 @@ def perturb_past( _, _, _, curr_length, _ = past[0].shape if curr_length > window_length and window_length > 0: - ones_key_val_shape = tuple(past[0].shape[:-2]) + tuple([window_length]) + tuple(past[0].shape[-1:]) + ones_key_val_shape = tuple(past[0].shape[:-2]) + (window_length,) + tuple(past[0].shape[-1:]) - zeros_key_val_shape = ( - tuple(past[0].shape[:-2]) + tuple([curr_length - window_length]) + tuple(past[0].shape[-1:]) - ) + zeros_key_val_shape = tuple(past[0].shape[:-2]) + (curr_length - window_length,) + tuple(past[0].shape[-1:]) ones_mask = torch.ones(ones_key_val_shape) ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3) diff --git a/examples/research_projects/rag-end2end-retriever/finetune_rag.py b/examples/research_projects/rag-end2end-retriever/finetune_rag.py index 8d0ba293b1..194eeb3fa3 100644 --- a/examples/research_projects/rag-end2end-retriever/finetune_rag.py +++ b/examples/research_projects/rag-end2end-retriever/finetune_rag.py @@ -164,11 +164,11 @@ class GenerativeQAModule(BaseTransformer): self.step_count = 0 self.metrics = defaultdict(list) - self.dataset_kwargs: dict = dict( - data_dir=self.hparams.data_dir, - max_source_length=self.hparams.max_source_length, - prefix=prefix or "", - ) + self.dataset_kwargs: dict = { + "data_dir": self.hparams.data_dir, + "max_source_length": self.hparams.max_source_length, + "prefix": prefix or "", + } n_observations_per_split = { "train": self.hparams.n_train, "val": self.hparams.n_val, diff --git a/examples/research_projects/rag-end2end-retriever/utils_rag.py b/examples/research_projects/rag-end2end-retriever/utils_rag.py index 7bf5d7e35e..ec98c1d782 100644 --- a/examples/research_projects/rag-end2end-retriever/utils_rag.py +++ b/examples/research_projects/rag-end2end-retriever/utils_rag.py @@ -137,7 +137,7 @@ logger = getLogger(__name__) def flatten_list(summary_ids: List[List]): - return [x for x in itertools.chain.from_iterable(summary_ids)] + return list(itertools.chain.from_iterable(summary_ids)) def save_git_info(folder_path: str) -> None: diff --git a/examples/research_projects/rag/finetune_rag.py b/examples/research_projects/rag/finetune_rag.py index f5cef614e2..2e058850ec 100644 --- a/examples/research_projects/rag/finetune_rag.py +++ b/examples/research_projects/rag/finetune_rag.py @@ -162,11 +162,11 @@ class GenerativeQAModule(BaseTransformer): self.step_count = 0 self.metrics = defaultdict(list) - self.dataset_kwargs: dict = dict( - data_dir=self.hparams.data_dir, - max_source_length=self.hparams.max_source_length, - prefix=prefix or "", - ) + self.dataset_kwargs: dict = { + "data_dir": self.hparams.data_dir, + "max_source_length": self.hparams.max_source_length, + "prefix": prefix or "", + } n_observations_per_split = { "train": self.hparams.n_train, "val": self.hparams.n_val, diff --git a/examples/research_projects/rag/utils_rag.py b/examples/research_projects/rag/utils_rag.py index 7bf5d7e35e..ec98c1d782 100644 --- a/examples/research_projects/rag/utils_rag.py +++ b/examples/research_projects/rag/utils_rag.py @@ -137,7 +137,7 @@ logger = getLogger(__name__) def flatten_list(summary_ids: List[List]): - return [x for x in itertools.chain.from_iterable(summary_ids)] + return list(itertools.chain.from_iterable(summary_ids)) def save_git_info(folder_path: str) -> None: diff --git a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py index aaacc79ceb..abbe9a9982 100755 --- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py +++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py @@ -344,7 +344,7 @@ def create_vocabulary_from_data( lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values() ) - vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))} + vocab_dict = {v: k for k, v in enumerate(sorted(vocab_set))} # replace white space with delimiter token if word_delimiter_token is not None: diff --git a/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples.py b/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples.py index b1c84ad9b8..454951ed38 100644 --- a/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples.py +++ b/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples.py @@ -145,18 +145,18 @@ class TestSummarizationDistiller(TestCasePlus): assert not failures, f"The following models could not be loaded through AutoConfig: {failures}" def test_distill_no_teacher(self): - updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True) + updates = {"student_encoder_layers": 2, "student_decoder_layers": 1, "no_teacher": True} self._test_distiller_cli(updates) def test_distill_checkpointing_with_teacher(self): - updates = dict( - student_encoder_layers=2, - student_decoder_layers=1, - max_epochs=4, - val_check_interval=0.25, - alpha_hid=2.0, - model_name_or_path="IGNORE_THIS_IT_DOESNT_GET_USED", - ) + updates = { + "student_encoder_layers": 2, + "student_decoder_layers": 1, + "max_epochs": 4, + "val_check_interval": 0.25, + "alpha_hid": 2.0, + "model_name_or_path": "IGNORE_THIS_IT_DOESNT_GET_USED", + } model = self._test_distiller_cli(updates, check_contents=False) ckpts = list(Path(model.output_dir).glob("*.ckpt")) @@ -193,19 +193,19 @@ class TestSummarizationDistiller(TestCasePlus): self.assertEqual(nll_loss, model_computed_loss) def test_distill_mbart(self): - updates = dict( - student_encoder_layers=2, - student_decoder_layers=1, - num_train_epochs=4, - val_check_interval=0.25, - alpha_hid=2.0, - task="translation", - model_name_or_path="IGNORE_THIS_IT_DOESNT_GET_USED", - tokenizer_name=MBART_TINY, - teacher=MBART_TINY, - src_lang="en_XX", - tgt_lang="ro_RO", - ) + updates = { + "student_encoder_layers": 2, + "student_decoder_layers": 1, + "num_train_epochs": 4, + "val_check_interval": 0.25, + "alpha_hid": 2.0, + "task": "translation", + "model_name_or_path": "IGNORE_THIS_IT_DOESNT_GET_USED", + "tokenizer_name": MBART_TINY, + "teacher": MBART_TINY, + "src_lang": "en_XX", + "tgt_lang": "ro_RO", + } model = self._test_distiller_cli(updates, check_contents=False) assert model.model.config.model_type == "mbart" @@ -217,39 +217,39 @@ class TestSummarizationDistiller(TestCasePlus): self.assertEqual(len(transformer_ckpts), 2) def test_distill_t5(self): - updates = dict( - student_encoder_layers=1, - student_decoder_layers=1, - alpha_hid=2.0, - teacher=T5_TINY, - model_name_or_path=T5_TINY, - tokenizer_name=T5_TINY, - ) + updates = { + "student_encoder_layers": 1, + "student_decoder_layers": 1, + "alpha_hid": 2.0, + "teacher": T5_TINY, + "model_name_or_path": T5_TINY, + "tokenizer_name": T5_TINY, + } self._test_distiller_cli(updates) def test_distill_different_base_models(self): - updates = dict( - teacher=T5_TINY, - student=T5_TINIER, - model_name_or_path=T5_TINIER, - tokenizer_name=T5_TINIER, - ) + updates = { + "teacher": T5_TINY, + "student": T5_TINIER, + "model_name_or_path": T5_TINIER, + "tokenizer_name": T5_TINIER, + } self._test_distiller_cli(updates) def _test_distiller_cli(self, updates, check_contents=True): - default_updates = dict( - label_smoothing=0.0, - early_stopping_patience=-1, - train_batch_size=1, - eval_batch_size=2, - max_epochs=2, - alpha_mlm=0.2, - alpha_ce=0.8, - do_predict=True, - model_name_or_path="sshleifer/tinier_bart", - teacher=CHEAP_ARGS["model_name_or_path"], - val_check_interval=0.5, - ) + default_updates = { + "label_smoothing": 0.0, + "early_stopping_patience": -1, + "train_batch_size": 1, + "eval_batch_size": 2, + "max_epochs": 2, + "alpha_mlm": 0.2, + "alpha_ce": 0.8, + "do_predict": True, + "model_name_or_path": "sshleifer/tinier_bart", + "teacher": CHEAP_ARGS["model_name_or_path"], + "val_check_interval": 0.5, + } default_updates.update(updates) args_d: dict = CHEAP_ARGS.copy() tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) diff --git a/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples_multi_gpu.py b/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples_multi_gpu.py index bb06ec8e65..9eeb3b30d3 100644 --- a/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples_multi_gpu.py +++ b/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples_multi_gpu.py @@ -98,29 +98,29 @@ class TestSummarizationDistillerMultiGPU(TestCasePlus): @require_torch_multi_gpu def test_multi_gpu(self): - updates = dict( - no_teacher=True, - freeze_encoder=True, - gpus=2, - overwrite_output_dir=True, - sortish_sampler=True, - ) + updates = { + "no_teacher": True, + "freeze_encoder": True, + "gpus": 2, + "overwrite_output_dir": True, + "sortish_sampler": True, + } self._test_distiller_cli_fork(updates, check_contents=False) def _test_distiller_cli_fork(self, updates, check_contents=True): - default_updates = dict( - label_smoothing=0.0, - early_stopping_patience=-1, - train_batch_size=1, - eval_batch_size=2, - max_epochs=2, - alpha_mlm=0.2, - alpha_ce=0.8, - do_predict=True, - model_name_or_path="sshleifer/tinier_bart", - teacher=CHEAP_ARGS["model_name_or_path"], - val_check_interval=0.5, - ) + default_updates = { + "label_smoothing": 0.0, + "early_stopping_patience": -1, + "train_batch_size": 1, + "eval_batch_size": 2, + "max_epochs": 2, + "alpha_mlm": 0.2, + "alpha_ce": 0.8, + "do_predict": True, + "model_name_or_path": "sshleifer/tinier_bart", + "teacher": CHEAP_ARGS["model_name_or_path"], + "val_check_interval": 0.5, + } default_updates.update(updates) args_d: dict = CHEAP_ARGS.copy() tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) diff --git a/examples/research_projects/seq2seq-distillation/finetune.py b/examples/research_projects/seq2seq-distillation/finetune.py index 77f02bef13..a13f9b533d 100755 --- a/examples/research_projects/seq2seq-distillation/finetune.py +++ b/examples/research_projects/seq2seq-distillation/finetune.py @@ -74,11 +74,11 @@ class SummarizationModule(BaseTransformer): self.model_type = self.config.model_type self.vocab_size = self.config.tgt_vocab_size if self.model_type == "fsmt" else self.config.vocab_size - self.dataset_kwargs: dict = dict( - data_dir=self.hparams.data_dir, - max_source_length=self.hparams.max_source_length, - prefix=self.model.config.prefix or "", - ) + self.dataset_kwargs: dict = { + "data_dir": self.hparams.data_dir, + "max_source_length": self.hparams.max_source_length, + "prefix": self.model.config.prefix or "", + } n_observations_per_split = { "train": self.hparams.n_train, "val": self.hparams.n_val, @@ -433,7 +433,7 @@ def main(args, model=None) -> SummarizationModule: return model model.hparams.test_checkpoint = "" - checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "*.ckpt"), recursive=True))) + checkpoints = sorted(glob.glob(os.path.join(args.output_dir, "*.ckpt"), recursive=True)) if checkpoints: model.hparams.test_checkpoint = checkpoints[-1] trainer.resume_from_checkpoint = checkpoints[-1] diff --git a/examples/research_projects/seq2seq-distillation/make_student.py b/examples/research_projects/seq2seq-distillation/make_student.py index c1efc1b497..83e014bf48 100644 --- a/examples/research_projects/seq2seq-distillation/make_student.py +++ b/examples/research_projects/seq2seq-distillation/make_student.py @@ -171,11 +171,11 @@ def create_student_by_copying_alternating_layers( logger.info( f"Copied encoder layers {e_layers_to_copy} and decoder layers {d_layers_to_copy}. Saving them to {save_path}" ) - student.config.init_metadata = dict( - teacher_type=teacher.config.model_type, - copied_encoder_layers=e_layers_to_copy, - copied_decoder_layers=d_layers_to_copy, - ) + student.config.init_metadata = { + "teacher_type": teacher.config.model_type, + "copied_encoder_layers": e_layers_to_copy, + "copied_decoder_layers": d_layers_to_copy, + } student.save_pretrained(save_path) # Save information about copying for easier reproducibility diff --git a/examples/research_projects/seq2seq-distillation/run_eval.py b/examples/research_projects/seq2seq-distillation/run_eval.py index 3f685884e8..98c9786d2c 100755 --- a/examples/research_projects/seq2seq-distillation/run_eval.py +++ b/examples/research_projects/seq2seq-distillation/run_eval.py @@ -63,7 +63,7 @@ def generate_summaries_or_translations( fout.close() runtime = int(time.time() - start_time) # seconds n_obs = len(examples) - return dict(n_obs=n_obs, runtime=runtime, seconds_per_sample=round(runtime / n_obs, 4)) + return {"n_obs": n_obs, "runtime": runtime, "seconds_per_sample": round(runtime / n_obs, 4)} def datetime_now(): diff --git a/examples/research_projects/seq2seq-distillation/utils.py b/examples/research_projects/seq2seq-distillation/utils.py index f1a8cef850..de666e0c24 100644 --- a/examples/research_projects/seq2seq-distillation/utils.py +++ b/examples/research_projects/seq2seq-distillation/utils.py @@ -437,7 +437,7 @@ def pickle_save(obj, path): def flatten_list(summary_ids: List[List]): - return [x for x in itertools.chain.from_iterable(summary_ids)] + return list(itertools.chain.from_iterable(summary_ids)) def save_git_info(folder_path: str) -> None: diff --git a/examples/research_projects/tapex/wikisql_utils.py b/examples/research_projects/tapex/wikisql_utils.py index 3028e81ad4..110b14e02f 100644 --- a/examples/research_projects/tapex/wikisql_utils.py +++ b/examples/research_projects/tapex/wikisql_utils.py @@ -30,7 +30,7 @@ EMPTY_ANSWER_AGG = "none" def _split_thousands(delimiter, value): split = value.split(delimiter) - return len(split) > 1 and any(map(lambda x: len(x) == 3, split)) + return len(split) > 1 and any((len(x) == 3 for x in split)) def convert_to_float(value): @@ -123,7 +123,7 @@ _TOKENIZER = re.compile(r"\w+|[^\w\s]+", re.UNICODE | re.MULTILINE | re.DOTALL) def _normalize_for_match(x): - return [t for t in _TOKENIZER.findall(x.lower())] + return list(_TOKENIZER.findall(x.lower())) def _compare(operator, src, tgt): diff --git a/examples/research_projects/visual_bert/extracting_data.py b/examples/research_projects/visual_bert/extracting_data.py index 9c445be336..6b1342c9b1 100644 --- a/examples/research_projects/visual_bert/extracting_data.py +++ b/examples/research_projects/visual_bert/extracting_data.py @@ -61,7 +61,7 @@ class Extract: assert outputfile is not None and not os.path.isfile(outputfile), f"{outputfile}" if subset_list is not None: with open(os.path.realpath(subset_list)) as f: - self.subset_list = set(map(lambda x: self._vqa_file_split()[0], tryload(f))) + self.subset_list = {self._vqa_file_split()[0] for x in tryload(f)} else: self.subset_list = None diff --git a/examples/research_projects/visual_bert/modeling_frcnn.py b/examples/research_projects/visual_bert/modeling_frcnn.py index 08758b1d3c..edbd224cbe 100644 --- a/examples/research_projects/visual_bert/modeling_frcnn.py +++ b/examples/research_projects/visual_bert/modeling_frcnn.py @@ -1095,7 +1095,7 @@ class ROIPooler(nn.Module): Returns: A tensor of shape(N*B, Channels, output_size, output_size) """ - x = [v for v in feature_maps.values()] + x = list(feature_maps.values()) num_level_assignments = len(self.level_poolers) assert len(x) == num_level_assignments and len(boxes) == x[0].size(0) diff --git a/examples/research_projects/vqgan-clip/VQGAN_CLIP.py b/examples/research_projects/vqgan-clip/VQGAN_CLIP.py index b5a23c15b2..1bfbc4cd5c 100644 --- a/examples/research_projects/vqgan-clip/VQGAN_CLIP.py +++ b/examples/research_projects/vqgan-clip/VQGAN_CLIP.py @@ -99,7 +99,7 @@ class VQGAN_CLIP(nn.Module): output_path = "./animation.gif" if input_path is None: input_path = self.save_path - paths = list(sorted(glob(input_path + "/*"))) + paths = sorted(glob(input_path + "/*")) if not len(paths): raise ValueError( "No images found in save path, aborting (did you pass save_intermediate=True to the generate" @@ -178,7 +178,7 @@ class VQGAN_CLIP(nn.Module): wandb.init(reinit=True, project="face-editor") wandb.config.update({"Positive Prompts": positive_prompts}) wandb.config.update({"Negative Prompts": negative_prompts}) - wandb.config.update(dict(lr=self.lr, iterations=self.iterations)) + wandb.config.update({"lr": self.lr, "iterations": self.iterations}) if image_path: image = Image.open(image_path) image = image.resize((256, 256)) diff --git a/examples/research_projects/vqgan-clip/loaders.py b/examples/research_projects/vqgan-clip/loaders.py index e8650f7212..88513bcb69 100644 --- a/examples/research_projects/vqgan-clip/loaders.py +++ b/examples/research_projects/vqgan-clip/loaders.py @@ -47,7 +47,7 @@ def get_obj_from_str(string, reload=False): def instantiate_from_config(config): if "target" not in config: raise KeyError("Expected key `target` to instantiate.") - return get_obj_from_str(config["target"])(**config.get("params", dict())) + return get_obj_from_str(config["target"])(**config.get("params", {})) def load_model_from_config(config, sd, gpu=True, eval_mode=True): diff --git a/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py b/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py index 8f181409d6..0f3e239df6 100644 --- a/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py +++ b/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py @@ -51,7 +51,7 @@ from transformers.trainer_utils import set_seed # noqa set_seed(42) -models = dict(base="patrickvonplaten/wav2vec2_tiny_random", robust="patrickvonplaten/wav2vec2_tiny_random_robust") +models = {"base": "patrickvonplaten/wav2vec2_tiny_random", "robust": "patrickvonplaten/wav2vec2_tiny_random_robust"} ZERO2 = "zero2" ZERO3 = "zero3" diff --git a/examples/research_projects/xtreme-s/run_xtreme_s.py b/examples/research_projects/xtreme-s/run_xtreme_s.py index 38ed3376ec..6c5b4bde89 100644 --- a/examples/research_projects/xtreme-s/run_xtreme_s.py +++ b/examples/research_projects/xtreme-s/run_xtreme_s.py @@ -400,7 +400,7 @@ def create_vocabulary_from_data( | (set(vocabs["predict"]["vocab"][0]) if "predict" in vocabs else set()) ) - vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))} + vocab_dict = {v: k for k, v in enumerate(sorted(vocab_set))} # replace white space with delimiter token if word_delimiter_token is not None: diff --git a/examples/tensorflow/benchmarking/plot_csv_file.py b/examples/tensorflow/benchmarking/plot_csv_file.py index 1a0ae735d8..9a9ad9c670 100644 --- a/examples/tensorflow/benchmarking/plot_csv_file.py +++ b/examples/tensorflow/benchmarking/plot_csv_file.py @@ -83,7 +83,7 @@ def can_convert_to_float(string): class Plot: def __init__(self, args): self.args = args - self.result_dict = defaultdict(lambda: dict(bsz=[], seq_len=[], result={})) + self.result_dict = defaultdict(lambda: {"bsz": [], "seq_len": [], "result": {}}) with open(self.args.csv_file, newline="") as csv_file: reader = csv.DictReader(csv_file) @@ -116,8 +116,8 @@ class Plot: axis.set_major_formatter(ScalarFormatter()) for model_name_idx, model_name in enumerate(self.result_dict.keys()): - batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"]))) - sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"]))) + batch_sizes = sorted(set(self.result_dict[model_name]["bsz"])) + sequence_lengths = sorted(set(self.result_dict[model_name]["seq_len"])) results = self.result_dict[model_name]["result"] (x_axis_array, inner_loop_array) = ( diff --git a/examples/tensorflow/image-classification/run_image_classification.py b/examples/tensorflow/image-classification/run_image_classification.py index d9fcc8daaf..b115906064 100644 --- a/examples/tensorflow/image-classification/run_image_classification.py +++ b/examples/tensorflow/image-classification/run_image_classification.py @@ -300,7 +300,7 @@ def main(): # Prepare label mappings. # We'll include these in the model's config to get human readable labels in the Inference API. labels = dataset["train"].features["labels"].names - label2id, id2label = dict(), dict() + label2id, id2label = {}, {} for i, label in enumerate(labels): label2id[label] = str(i) id2label[str(i)] = label diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py index 51087123b5..861929afb5 100755 --- a/examples/tensorflow/language-modeling/run_clm.py +++ b/examples/tensorflow/language-modeling/run_clm.py @@ -600,7 +600,7 @@ def main(): if training_args.output_dir is not None: output_eval_file = os.path.join(training_args.output_dir, "all_results.json") - results_dict = dict() + results_dict = {} results_dict["train_loss"] = train_loss results_dict["train_perplexity"] = train_perplexity results_dict["eval_loss"] = validation_loss diff --git a/examples/tensorflow/language-modeling/run_mlm.py b/examples/tensorflow/language-modeling/run_mlm.py index f7812b611b..5db7130df5 100755 --- a/examples/tensorflow/language-modeling/run_mlm.py +++ b/examples/tensorflow/language-modeling/run_mlm.py @@ -623,7 +623,7 @@ def main(): if training_args.output_dir is not None: output_eval_file = os.path.join(training_args.output_dir, "all_results.json") - results_dict = dict() + results_dict = {} results_dict["train_loss"] = train_loss results_dict["train_perplexity"] = train_perplexity results_dict["eval_loss"] = validation_loss diff --git a/examples/tensorflow/question-answering/run_qa.py b/examples/tensorflow/question-answering/run_qa.py index 1c3acd34ae..d6a816525e 100755 --- a/examples/tensorflow/question-answering/run_qa.py +++ b/examples/tensorflow/question-answering/run_qa.py @@ -464,7 +464,7 @@ def main(): return tokenized_examples - processed_datasets = dict() + processed_datasets = {} if training_args.do_train: if "train" not in datasets: raise ValueError("--do_train requires a train dataset") diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index bf03901011..428565bb24 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -310,12 +310,12 @@ def main(): if config.label2id != PretrainedConfig(num_labels=num_labels).label2id and not is_regression: # Some have all caps in their config, some don't. label_name_to_id = {k.lower(): v for k, v in config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}." "\nIgnoring the model labels as a result.", ) label_to_id = {label: i for i, label in enumerate(label_list)} @@ -383,7 +383,7 @@ def main(): dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF num_replicas = training_args.strategy.num_replicas_in_sync - tf_data = dict() + tf_data = {} max_samples = { "train": data_args.max_train_samples, "validation": data_args.max_eval_samples, diff --git a/examples/tensorflow/text-classification/run_text_classification.py b/examples/tensorflow/text-classification/run_text_classification.py index 0cf1972e93..f46d11c61c 100644 --- a/examples/tensorflow/text-classification/run_text_classification.py +++ b/examples/tensorflow/text-classification/run_text_classification.py @@ -343,13 +343,13 @@ def main(): if "train" in datasets: if not is_regression and config.label2id != PretrainedConfig(num_labels=num_labels).label2id: label_name_to_id = config.label2id - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): label_to_id = label_name_to_id # Use the model's labels else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels:" - f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.", + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels:" + f" {sorted(label_list)}.\nIgnoring the model labels as a result.", ) label_to_id = {v: i for i, v in enumerate(label_list)} elif not is_regression: @@ -411,7 +411,7 @@ def main(): dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF num_replicas = training_args.strategy.num_replicas_in_sync - tf_data = dict() + tf_data = {} max_samples = { "train": data_args.max_train_samples, "validation": data_args.max_val_samples, diff --git a/pyproject.toml b/pyproject.toml index 26fa9e0bb0..1a488dbba9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ target-version = ['py37'] [tool.ruff] # Never enforce `E501` (line length violations). ignore = ["E501", "E741", "W605"] -select = ["E", "F", "I", "W"] +select = ["C", "E", "F", "I", "W"] line-length = 119 # Ignore import violations in all `__init__.py` files. diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py index a6c6353c19..bde10f6712 100644 --- a/src/transformers/benchmark/benchmark_utils.py +++ b/src/transformers/benchmark/benchmark_utils.py @@ -557,9 +557,9 @@ def stop_memory_tracing( cumulative_memory_dict[frame][2] += cpu_gpu_mem_inc cumulative_memory = sorted( - list(cumulative_memory_dict.items()), key=lambda x: x[1][2], reverse=True + cumulative_memory_dict.items(), key=lambda x: x[1][2], reverse=True ) # order by the total CPU + GPU memory increase - cumulative_memory = list( + cumulative_memory = [ MemoryState( frame=frame, cpu=Memory(cpu_mem_inc), @@ -567,7 +567,7 @@ def stop_memory_tracing( cpu_gpu=Memory(cpu_gpu_mem_inc), ) for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory - ) + ] memory_curr_trace = sorted(memory_curr_trace, key=lambda x: x.cpu_gpu.bytes, reverse=True) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index e3b4148b39..37268ea34b 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -324,7 +324,7 @@ class PretrainedConfig(PushToHubMixin): f"You passed along `num_labels={num_labels}` with an incompatible id to label map: " f"{self.id2label}. The number of labels wil be overwritten to {self.num_labels}." ) - self.id2label = dict((int(key), value) for key, value in self.id2label.items()) + self.id2label = {int(key): value for key, value in self.id2label.items()} # Keys are always strings in JSON so convert ids to int here. else: self.num_labels = kwargs.pop("num_labels", 2) @@ -696,7 +696,7 @@ class PretrainedConfig(PushToHubMixin): config = cls(**config_dict) if hasattr(config, "pruned_heads"): - config.pruned_heads = dict((int(key), value) for key, value in config.pruned_heads.items()) + config.pruned_heads = {int(key): value for key, value in config.pruned_heads.items()} # Update config with kwargs if needed if "num_labels" in kwargs and "id2label" in kwargs: diff --git a/src/transformers/deepspeed.py b/src/transformers/deepspeed.py index 5a76cdf8e1..9dcd7be7f4 100644 --- a/src/transformers/deepspeed.py +++ b/src/transformers/deepspeed.py @@ -367,13 +367,13 @@ def deepspeed_init(trainer, num_training_steps, resume_from_checkpoint=None, inf # keep for quick debug: # from pprint import pprint; pprint(config) - kwargs = dict( - model=model, - model_parameters=model_parameters, - config_params=config, - optimizer=optimizer, - lr_scheduler=lr_scheduler, - ) + kwargs = { + "model": model, + "model_parameters": model_parameters, + "config_params": config, + "optimizer": optimizer, + "lr_scheduler": lr_scheduler, + } deepspeed_engine, optimizer, _, lr_scheduler = deepspeed.initialize(**kwargs) diff --git a/src/transformers/feature_extraction_sequence_utils.py b/src/transformers/feature_extraction_sequence_utils.py index 831d30e390..2121261be0 100644 --- a/src/transformers/feature_extraction_sequence_utils.py +++ b/src/transformers/feature_extraction_sequence_utils.py @@ -188,7 +188,7 @@ class SequenceFeatureExtractor(FeatureExtractionMixin): truncated_inputs = [] for i in range(batch_size): - inputs = dict((k, v[i]) for k, v in processed_features.items()) + inputs = {k: v[i] for k, v in processed_features.items()} # truncation inputs_slice = self._truncate( inputs, diff --git a/src/transformers/generation/beam_constraints.py b/src/transformers/generation/beam_constraints.py index baf7e3b71e..2563ac23cd 100644 --- a/src/transformers/generation/beam_constraints.py +++ b/src/transformers/generation/beam_constraints.py @@ -208,12 +208,12 @@ class DisjunctiveTrie: """ self.max_height = max([len(one) for one in nested_token_ids]) - root = dict() + root = {} for token_ids in nested_token_ids: level = root for tidx, token_id in enumerate(token_ids): if token_id not in level: - level[token_id] = dict() + level[token_id] = {} level = level[token_id] diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py index 0bd6095f44..ba777f1e8e 100644 --- a/src/transformers/generation/logits_process.py +++ b/src/transformers/generation/logits_process.py @@ -951,7 +951,7 @@ class WhisperTimeStampLogitsProcessor(LogitsProcessor): # timestamps have to appear in pairs, except directly before eos_token; mask logits accordingly for k in range(input_ids.shape[0]): - seq = [t for t in input_ids[k, self.begin_index :].tolist()] + seq = list(input_ids[k, self.begin_index :].tolist()) last_was_timestamp = len(seq) >= 1 and seq[-1] >= self.timestamp_begin penultimate_was_timestamp = len(seq) < 2 or seq[-2] >= self.timestamp_begin diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index b8db1115af..08ec05fa09 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -115,7 +115,7 @@ def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]: if is_valid_image(images): if images.ndim == expected_ndims + 1: # Batch of images - images = [image for image in images] + images = list(images) elif images.ndim == expected_ndims: # Single image images = [images] diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index 38e23ea5b0..a2effeac63 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -365,7 +365,7 @@ def run_hp_search_sigopt(trainer, n_trials: int, direction: str, **kwargs) -> Be name="huggingface-tune", type="offline", parameters=trainer.hp_space(None), - metrics=[dict(name="objective", objective=direction, strategy="optimize")], + metrics=[{"name": "objective", "objective": direction, "strategy": "optimize"}], parallel_bandwidth=1, budget=n_trials, ) @@ -402,7 +402,7 @@ def run_hp_search_sigopt(trainer, n_trials: int, direction: str, **kwargs) -> Be experiment = conn.experiments().create( name="huggingface-tune", parameters=trainer.hp_space(None), - metrics=[dict(name="objective", objective=direction, strategy="optimize")], + metrics=[{"name": "objective", "objective": direction, "strategy": "optimize"}], parallel_bandwidth=1, observation_budget=n_trials, project="huggingface", @@ -425,7 +425,7 @@ def run_hp_search_sigopt(trainer, n_trials: int, direction: str, **kwargs) -> Be metrics = trainer.evaluate() trainer.objective = trainer.compute_objective(metrics) - values = [dict(name="objective", value=trainer.objective)] + values = [{"name": "objective", "value": trainer.objective}] obs = conn.experiments(experiment.id).observations().create(suggestion=suggestion.id, values=values) logger.info(f"[suggestion_id, observation_id]: [{suggestion.id}, {obs.id}]") experiment = conn.experiments(experiment.id).fetch() diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py index 4fd2da18a6..c553b0c1e3 100644 --- a/src/transformers/keras_callbacks.py +++ b/src/transformers/keras_callbacks.py @@ -162,7 +162,7 @@ class KerasMetricCallback(Callback): def _postprocess_predictions_or_labels(self, inputs): if isinstance(inputs[0], dict): - outputs = dict() + outputs = {} for key in inputs[0].keys(): outputs[key] = self._concatenate_batches([batch[key] for batch in inputs]) # If it's a dict with only one key, just return the array diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py index 4c93b810ec..ac954272cd 100644 --- a/src/transformers/modelcard.py +++ b/src/transformers/modelcard.py @@ -677,7 +677,7 @@ class TrainingSummary: _, eval_lines, eval_results = parse_keras_history(keras_history) else: eval_lines = [] - eval_results = dict() + eval_results = {} hyperparameters = extract_hyperparameters_from_keras(model) return cls( @@ -706,7 +706,7 @@ def parse_keras_history(logs): # This looks like a `History` object if not hasattr(logs, "epoch"): # This history looks empty, return empty results - return None, [], dict() + return None, [], {} logs.history["epoch"] = logs.epoch logs = logs.history else: @@ -716,7 +716,7 @@ def parse_keras_history(logs): lines = [] for i in range(len(logs["epoch"])): epoch_dict = {log_key: log_value_list[i] for log_key, log_value_list in logs.items()} - values = dict() + values = {} for k, v in epoch_dict.items(): if k.startswith("val_"): k = "validation_" + k[4:] @@ -797,7 +797,7 @@ def parse_log_history(log_history): def extract_hyperparameters_from_keras(model): import tensorflow as tf - hyperparameters = dict() + hyperparameters = {} if hasattr(model, "optimizer") and model.optimizer is not None: hyperparameters["optimizer"] = model.optimizer.get_config() else: diff --git a/src/transformers/modeling_flax_pytorch_utils.py b/src/transformers/modeling_flax_pytorch_utils.py index e013e74eef..c78b1b44cd 100644 --- a/src/transformers/modeling_flax_pytorch_utils.py +++ b/src/transformers/modeling_flax_pytorch_utils.py @@ -76,7 +76,7 @@ def rename_key_and_reshape_tensor( def is_key_or_prefix_key_in_dict(key: Tuple[str]) -> bool: """Checks if `key` of `(prefix,) + key` is in random_flax_state_dict""" - return len(set(random_flax_state_dict) & set([key, (model_prefix,) + key])) > 0 + return len(set(random_flax_state_dict) & {key, (model_prefix,) + key}) > 0 # layer norm renamed_pt_tuple_key = pt_tuple_key[:-1] + ("scale",) @@ -122,10 +122,10 @@ def convert_pytorch_state_dict_to_flax(pt_state_dict, flax_model): flax_state_dict = {} load_model_with_head_into_base_model = (model_prefix not in flax_model.params) and ( - model_prefix in set([k.split(".")[0] for k in pt_state_dict.keys()]) + model_prefix in {k.split(".")[0] for k in pt_state_dict.keys()} ) load_base_model_into_model_with_head = (model_prefix in flax_model.params) and ( - model_prefix not in set([k.split(".")[0] for k in pt_state_dict.keys()]) + model_prefix not in {k.split(".")[0] for k in pt_state_dict.keys()} ) # Need to change some parameters name to match Flax names @@ -179,10 +179,10 @@ def convert_pytorch_sharded_state_dict_to_flax(shard_filenames, flax_model): random_flax_state_dict = flatten_dict(flax_model.params) load_model_with_head_into_base_model = (model_prefix not in flax_model.params) and ( - model_prefix in set([k.split(".")[0] for k in pt_state_dict.keys()]) + model_prefix in {k.split(".")[0] for k in pt_state_dict.keys()} ) load_base_model_into_model_with_head = (model_prefix in flax_model.params) and ( - model_prefix not in set([k.split(".")[0] for k in pt_state_dict.keys()]) + model_prefix not in {k.split(".")[0] for k in pt_state_dict.keys()} ) # Need to change some parameters name to match Flax names for pt_key, pt_tensor in pt_state_dict.items(): @@ -267,10 +267,10 @@ def load_flax_weights_in_pytorch_model(pt_model, flax_state): pt_model_dict = pt_model.state_dict() load_model_with_head_into_base_model = (pt_model.base_model_prefix in flax_state) and ( - pt_model.base_model_prefix not in set([k.split(".")[0] for k in pt_model_dict.keys()]) + pt_model.base_model_prefix not in {k.split(".")[0] for k in pt_model_dict.keys()} ) load_base_model_into_model_with_head = (pt_model.base_model_prefix not in flax_state) and ( - pt_model.base_model_prefix in set([k.split(".")[0] for k in pt_model_dict.keys()]) + pt_model.base_model_prefix in {k.split(".")[0] for k in pt_model_dict.keys()} ) # keep track of unexpected & missing keys diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index a635c7b62b..466f324ce8 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -440,7 +440,7 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): """ # Load the index - state_sharded_dict = dict() + state_sharded_dict = {} for shard_file in shard_files: # load using msgpack utils @@ -708,19 +708,19 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): filename = WEIGHTS_NAME if from_pt else FLAX_WEIGHTS_NAME try: # Load from URL or cache if already cached - cached_file_kwargs = dict( - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - resume_download=resume_download, - local_files_only=local_files_only, - use_auth_token=use_auth_token, - user_agent=user_agent, - revision=revision, - subfolder=subfolder, - _raise_exceptions_for_missing_entries=False, - _commit_hash=commit_hash, - ) + cached_file_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "resume_download": resume_download, + "local_files_only": local_files_only, + "use_auth_token": use_auth_token, + "user_agent": user_agent, + "revision": revision, + "subfolder": subfolder, + "_raise_exceptions_for_missing_entries": False, + "_commit_hash": commit_hash, + } resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs) # Since we set _raise_exceptions_for_missing_entries=False, we don't get an expection but a None diff --git a/src/transformers/modeling_tf_pytorch_utils.py b/src/transformers/modeling_tf_pytorch_utils.py index 9db0f582e2..5465da7427 100644 --- a/src/transformers/modeling_tf_pytorch_utils.py +++ b/src/transformers/modeling_tf_pytorch_utils.py @@ -258,7 +258,7 @@ def load_pytorch_state_dict_in_tf2_model( symbolic_weights = tf_model.trainable_weights + tf_model.non_trainable_weights tf_loaded_numel = 0 weight_value_tuples = [] - all_pytorch_weights = set(list(pt_state_dict.keys())) + all_pytorch_weights = set(pt_state_dict.keys()) missing_keys = [] for symbolic_weight in symbolic_weights: sw_name = symbolic_weight.name @@ -425,7 +425,7 @@ def load_tf2_state_dict_in_pytorch_model(pt_model, tf_state_dict, allow_missing_ ) tf_weights_map[pt_name] = (tf_weight, transpose) - all_tf_weights = set(list(tf_weights_map.keys())) + all_tf_weights = set(tf_weights_map.keys()) loaded_pt_weights_data_ptr = {} missing_keys_pt = [] for pt_weight_name, pt_weight in current_pt_params_dict.items(): diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 1a313ec959..c469c13ff0 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -584,7 +584,7 @@ def input_processing(func, config, **kwargs): if "kwargs" in output: del output["kwargs"] - cast_output = dict() + cast_output = {} for key, val in output.items(): if isinstance(val, tf.Tensor) and val.dtype == tf.int64: cast_output[key] = tf.cast(val, tf.int32) @@ -737,7 +737,7 @@ def load_tf_sharded_weights(model, shard_files, ignore_mismatched_sizes=False, s # Since TF adds the name of the class to its weights, and uses the index and not the name of the layer to load # the weight, we have to get rid of the first prefix of the name of the layer. model_keys = set() - model_layer_map = dict() + model_layer_map = {} for i, k in enumerate(model.weights): if "model." in k.name or len(k.name.split("/")) == 1: layer_name = k.name @@ -901,10 +901,10 @@ def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_size ) # Find the missing layers from the high level list of layers - missing_layers = list(set([layer.name for layer in model.layers]) - saved_h5_model_layers_name) + missing_layers = list({layer.name for layer in model.layers} - saved_h5_model_layers_name) # Find the unexpected layers from the high level list of layers - unexpected_layers = list(saved_h5_model_layers_name - set([layer.name for layer in model.layers])) + unexpected_layers = list(saved_h5_model_layers_name - {layer.name for layer in model.layers}) saved_weight_names_set = set() symbolic_weights_names = set() weight_value_tuples = [] @@ -1349,7 +1349,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu else: collate_fn = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="np") if collate_fn_args is None: - collate_fn_args = dict() + collate_fn_args = {} if not isinstance(dataset, datasets.Dataset): raise TypeError("Dataset argument should be a datasets.Dataset!") @@ -1471,7 +1471,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu elif "mc_labels" in arg_names: return {"labels": "logits", "mc_labels": "mc_logits"} else: - return dict() + return {} def train_step(self, data): """ @@ -2613,19 +2613,19 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu try: # Load from URL or cache if already cached - cached_file_kwargs = dict( - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - resume_download=resume_download, - local_files_only=local_files_only, - use_auth_token=use_auth_token, - user_agent=user_agent, - revision=revision, - subfolder=subfolder, - _raise_exceptions_for_missing_entries=False, - _commit_hash=commit_hash, - ) + cached_file_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "resume_download": resume_download, + "local_files_only": local_files_only, + "use_auth_token": use_auth_token, + "user_agent": user_agent, + "revision": revision, + "subfolder": subfolder, + "_raise_exceptions_for_missing_entries": False, + "_commit_hash": commit_hash, + } resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs) # Since we set _raise_exceptions_for_missing_entries=False, we don't get an exception but a None diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index bc12cbc668..73e6cf00ef 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1271,7 +1271,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix len(encoder_modules) > 0 ), f"Encoder module {encoder_pointer} does not match decoder module {decoder_pointer}" - all_encoder_weights = set([module_name + "/" + sub_name for sub_name in encoder_modules.keys()]) + all_encoder_weights = {module_name + "/" + sub_name for sub_name in encoder_modules.keys()} encoder_layer_pos = 0 for name, module in decoder_modules.items(): if name.isdigit(): @@ -2304,19 +2304,19 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix try: # Load from URL or cache if already cached - cached_file_kwargs = dict( - cache_dir=cache_dir, - force_download=force_download, - proxies=proxies, - resume_download=resume_download, - local_files_only=local_files_only, - use_auth_token=use_auth_token, - user_agent=user_agent, - revision=revision, - subfolder=subfolder, - _raise_exceptions_for_missing_entries=False, - _commit_hash=commit_hash, - ) + cached_file_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "resume_download": resume_download, + "local_files_only": local_files_only, + "use_auth_token": use_auth_token, + "user_agent": user_agent, + "revision": revision, + "subfolder": subfolder, + "_raise_exceptions_for_missing_entries": False, + "_commit_hash": commit_hash, + } resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs) # Since we set _raise_exceptions_for_missing_entries=False, we don't get an exception but a None @@ -2474,7 +2474,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix if is_sharded: loaded_state_dict_keys = sharded_metadata["all_checkpoint_keys"] else: - loaded_state_dict_keys = [k for k in state_dict.keys()] + loaded_state_dict_keys = list(state_dict.keys()) if low_cpu_mem_usage or use_keep_in_fp32_modules: state_dict = None @@ -3046,12 +3046,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix return model, missing_keys, unexpected_keys, mismatched_keys, offload_index, error_msgs def retrieve_modules_from_names(self, names, add_prefix=False, remove_prefix=False): - module_keys = set([".".join(key.split(".")[:-1]) for key in names]) + module_keys = {".".join(key.split(".")[:-1]) for key in names} # torch.nn.ParameterList is a special case where two parameter keywords # are appended to the module name, *e.g.* bert.special_embeddings.0 module_keys = module_keys.union( - set([".".join(key.split(".")[:-2]) for key in names if len(key) > 0 and key[-1].isdigit()]) + {".".join(key.split(".")[:-2]) for key in names if len(key) > 0 and key[-1].isdigit()} ) retrieved_modules = [] diff --git a/src/transformers/models/beit/modeling_flax_beit.py b/src/transformers/models/beit/modeling_flax_beit.py index 02fb2e5e33..328f759901 100644 --- a/src/transformers/models/beit/modeling_flax_beit.py +++ b/src/transformers/models/beit/modeling_flax_beit.py @@ -555,7 +555,7 @@ class FlaxBeitEncoder(nn.Module): ) # stochastic depth decay rule - drop_path_rates = [x for x in np.linspace(0, self.config.drop_path_rate, self.config.num_hidden_layers)] + drop_path_rates = list(np.linspace(0, self.config.drop_path_rate, self.config.num_hidden_layers)) self.layer = FlaxBeitLayerCollection( self.config, window_size=self.window_size, diff --git a/src/transformers/models/bertweet/tokenization_bertweet.py b/src/transformers/models/bertweet/tokenization_bertweet.py index 837fea1367..129806ebd3 100644 --- a/src/transformers/models/bertweet/tokenization_bertweet.py +++ b/src/transformers/models/bertweet/tokenization_bertweet.py @@ -318,7 +318,7 @@ class BertweetTokenizer(PreTrainedTokenizer): split_tokens = [] words = re.findall(r"\S+\n?", text) for token in words: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens def normalizeTweet(self, tweet): @@ -726,7 +726,7 @@ class TweetTokenizer: words = WORD_RE.findall(safe_text) # Possibly alter the case, but avoid changing emoticons like :D into :d: if not self.preserve_case: - words = list(map((lambda x: x if EMOTICON_RE.search(x) else x.lower()), words)) + words = [x if EMOTICON_RE.search(x) else x.lower() for x in words] return words diff --git a/src/transformers/models/big_bird/tokenization_big_bird_fast.py b/src/transformers/models/big_bird/tokenization_big_bird_fast.py index 11c3386794..c41c257d53 100644 --- a/src/transformers/models/big_bird/tokenization_big_bird_fast.py +++ b/src/transformers/models/big_bird/tokenization_big_bird_fast.py @@ -202,7 +202,7 @@ class BigBirdTokenizerFast(PreTrainedTokenizerFast): "You should not supply a second sequence if the provided sequence of " "ids is already formatted with special tokens for the model." ) - return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) + return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0] if token_ids_1 is None: return [1] + ([0] * len(token_ids_0)) + [1] diff --git a/src/transformers/models/biogpt/tokenization_biogpt.py b/src/transformers/models/biogpt/tokenization_biogpt.py index 55f337f2ec..d050fa699c 100644 --- a/src/transformers/models/biogpt/tokenization_biogpt.py +++ b/src/transformers/models/biogpt/tokenization_biogpt.py @@ -132,8 +132,8 @@ class BioGptTokenizer(PreTrainedTokenizer): self.lang = "en" self.sm = sacremoses # cache of sm.MosesTokenizer instance - self.cache_moses_tokenizer = dict() - self.cache_moses_detokenizer = dict() + self.cache_moses_tokenizer = {} + self.cache_moses_detokenizer = {} """ Initialisation""" with open(vocab_file, encoding="utf-8") as vocab_handle: @@ -221,7 +221,7 @@ class BioGptTokenizer(PreTrainedTokenizer): split_tokens = [] for token in text: if token: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py index a0b45bff1d..e26cdfbd98 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py @@ -191,7 +191,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer): words = re.findall(r"\S+\n?", text) for token in words: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens def _convert_token_to_id(self, token: str) -> int: diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py index c8a069784d..3942de2358 100644 --- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py @@ -89,7 +89,7 @@ def convert_bloom_checkpoint_to_pytorch( if shard_model: file_names = os.listdir(bloom_checkpoint_path) - file_names = list(sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names))) + file_names = sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names)) index_dict = {"weight_map": {}, "metadata": {}} total_size = 0 @@ -157,7 +157,7 @@ def convert_bloom_checkpoint_to_pytorch( model = BloomModel(config) file_names = os.listdir(bloom_checkpoint_path) - file_names = list(sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names))) + file_names = sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names)) missing_keys = None for i, file in enumerate(file_names): diff --git a/src/transformers/models/codegen/modeling_codegen.py b/src/transformers/models/codegen/modeling_codegen.py index fb7716a00e..b564dcdb68 100644 --- a/src/transformers/models/codegen/modeling_codegen.py +++ b/src/transformers/models/codegen/modeling_codegen.py @@ -85,7 +85,7 @@ def duplicate_interleave(m): # Copied from transformers.models.gptj.modeling_gptj.apply_rotary_pos_emb def apply_rotary_pos_emb(x, sincos, offset=0): - sin, cos = map(lambda t: duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :], sincos) + sin, cos = (duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :] for t in sincos) # einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2) return (x * cos) + (rotate_every_two(x) * sin) diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index d4e2f9dd5f..0d7e9aa0da 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -604,7 +604,7 @@ def binary_mask_to_rle(mask): pixels = np.concatenate([[0], pixels, [0]]) runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] - return [x for x in runs] + return list(runs) # Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle diff --git a/src/transformers/models/convnext/modeling_convnext.py b/src/transformers/models/convnext/modeling_convnext.py index 5e60ddfe6d..3ba8062b77 100755 --- a/src/transformers/models/convnext/modeling_convnext.py +++ b/src/transformers/models/convnext/modeling_convnext.py @@ -495,7 +495,7 @@ class ConvNextBackbone(ConvNextPreTrainedModel, BackboneMixin): self.out_feature_channels = out_feature_channels # Add layer norms to hidden states of out_features - hidden_states_norms = dict() + hidden_states_norms = {} for stage, num_channels in zip(self.out_features, self.channels): hidden_states_norms[stage] = ConvNextLayerNorm(num_channels, data_format="channels_first") self.hidden_states_norms = nn.ModuleDict(hidden_states_norms) diff --git a/src/transformers/models/ctrl/tokenization_ctrl.py b/src/transformers/models/ctrl/tokenization_ctrl.py index f8524bdf1f..7a81bf8572 100644 --- a/src/transformers/models/ctrl/tokenization_ctrl.py +++ b/src/transformers/models/ctrl/tokenization_ctrl.py @@ -208,7 +208,7 @@ class CTRLTokenizer(PreTrainedTokenizer): words = re.findall(r"\S+\n?", text) for token in words: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens def _convert_token_to_id(self, token): diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index c837670b1a..8bf8a88550 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -596,7 +596,7 @@ class TFData2VecVisionEncoder(tf.keras.layers.Layer): self.relative_position_bias = None # stochastic depth decay rule - dpr = [x for x in tf.linspace(0.0, config.drop_path_rate, config.num_hidden_layers)] + dpr = list(tf.linspace(0.0, config.drop_path_rate, config.num_hidden_layers)) self.layer = [ TFData2VecVisionLayer( config, diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index 3601a2aad1..5b6d9839e9 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -602,7 +602,7 @@ def binary_mask_to_rle(mask): pixels = np.concatenate([[0], pixels, [0]]) runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] - return [x for x in runs] + return list(runs) # Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 433853efef..75132b9a2f 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -590,7 +590,7 @@ def binary_mask_to_rle(mask): pixels = np.concatenate([[0], pixels, [0]]) runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] - return [x for x in runs] + return list(runs) # TODO - (Amy) make compatible with other frameworks diff --git a/src/transformers/models/dinat/modeling_dinat.py b/src/transformers/models/dinat/modeling_dinat.py index ef19005834..95191d52b5 100644 --- a/src/transformers/models/dinat/modeling_dinat.py +++ b/src/transformers/models/dinat/modeling_dinat.py @@ -899,7 +899,7 @@ class DinatBackbone(DinatPreTrainedModel, BackboneMixin): self.out_feature_channels[stage] = num_features[i] # Add layer norms to hidden states of out_features - hidden_states_norms = dict() + hidden_states_norms = {} for stage, num_channels in zip(self.out_features, self.channels): hidden_states_norms[stage] = nn.LayerNorm(num_channels) self.hidden_states_norms = nn.ModuleDict(hidden_states_norms) diff --git a/src/transformers/models/donut/processing_donut.py b/src/transformers/models/donut/processing_donut.py index 87f2dd34f9..5693fe110d 100644 --- a/src/transformers/models/donut/processing_donut.py +++ b/src/transformers/models/donut/processing_donut.py @@ -130,7 +130,7 @@ class DonutProcessor(ProcessorMixin): if added_vocab is None: added_vocab = self.tokenizer.get_added_vocab() - output = dict() + output = {} while tokens: start_token = re.search(r"", tokens, re.IGNORECASE) diff --git a/src/transformers/models/ernie_m/tokenization_ernie_m.py b/src/transformers/models/ernie_m/tokenization_ernie_m.py index e56451dd20..1acc113dca 100644 --- a/src/transformers/models/ernie_m/tokenization_ernie_m.py +++ b/src/transformers/models/ernie_m/tokenization_ernie_m.py @@ -133,8 +133,8 @@ class ErnieMTokenizer(PreTrainedTokenizer): if vocab_file is not None: self.vocab = self.load_vocab(filepath=vocab_file) else: - self.vocab = dict((self.sp_model.id_to_piece(id), id) for id in range(self.sp_model.get_piece_size())) - self.reverse_vocab = dict((v, k) for k, v in self.vocab.items()) + self.vocab = {self.sp_model.id_to_piece(id): id for id in range(self.sp_model.get_piece_size())} + self.reverse_vocab = {v: k for k, v in self.vocab.items()} def get_offset_mapping(self, text): if text is None: @@ -325,7 +325,7 @@ class ErnieMTokenizer(PreTrainedTokenizer): "You should not supply a second sequence if the provided sequence of " "ids is already formatted with special tokens for the model." ) - return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) + return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0] if token_ids_1 is not None: return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1] diff --git a/src/transformers/models/esm/modeling_esmfold.py b/src/transformers/models/esm/modeling_esmfold.py index d37891df35..05c165f586 100644 --- a/src/transformers/models/esm/modeling_esmfold.py +++ b/src/transformers/models/esm/modeling_esmfold.py @@ -201,9 +201,9 @@ def collate_dense_tensors(samples: List[torch.Tensor], pad_v: float = 0) -> torc """ if len(samples) == 0: return torch.Tensor() - if len(set(x.dim() for x in samples)) != 1: + if len({x.dim() for x in samples}) != 1: raise RuntimeError(f"Samples has varying dimensions: {[x.dim() for x in samples]}") - (device,) = tuple(set(x.device for x in samples)) # assumes all on same device + (device,) = tuple({x.device for x in samples}) # assumes all on same device max_shape = [max(lst) for lst in zip(*[x.shape for x in samples])] result = torch.empty(len(samples), *max_shape, dtype=samples[0].dtype, device=device) result.fill_(pad_v) diff --git a/src/transformers/models/esm/openfold_utils/chunk_utils.py b/src/transformers/models/esm/openfold_utils/chunk_utils.py index 4b60373438..301721d135 100644 --- a/src/transformers/models/esm/openfold_utils/chunk_utils.py +++ b/src/transformers/models/esm/openfold_utils/chunk_utils.py @@ -83,7 +83,7 @@ def _get_minimal_slice_set( # Base cases. Either start/end are empty and we're done, or the final, # one-dimensional tensor can be simply sliced if len(start) == 0: - return [tuple()] + return [()] elif len(start) == 1: return [(slice(start[0], end[0] + 1),)] diff --git a/src/transformers/models/flaubert/tokenization_flaubert.py b/src/transformers/models/flaubert/tokenization_flaubert.py index 26f68e75d7..ea3f1c8bfd 100644 --- a/src/transformers/models/flaubert/tokenization_flaubert.py +++ b/src/transformers/models/flaubert/tokenization_flaubert.py @@ -282,10 +282,10 @@ class FlaubertTokenizer(PreTrainedTokenizer): self.sm = sacremoses # cache of sm.MosesPunctNormalizer instance - self.cache_moses_punct_normalizer = dict() + self.cache_moses_punct_normalizer = {} # cache of sm.MosesTokenizer instance - self.cache_moses_tokenizer = dict() - self.lang_with_custom_tokenizer = set(["zh", "th", "ja"]) + self.cache_moses_tokenizer = {} + self.lang_with_custom_tokenizer = {"zh", "th", "ja"} self.lang2id = lang2id self.id2lang = id2lang if lang2id is not None and id2lang is not None: @@ -452,7 +452,7 @@ class FlaubertTokenizer(PreTrainedTokenizer): split_tokens = [] for token in text: if token: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens diff --git a/src/transformers/models/fsmt/tokenization_fsmt.py b/src/transformers/models/fsmt/tokenization_fsmt.py index 1c401c1faa..523f2ed588 100644 --- a/src/transformers/models/fsmt/tokenization_fsmt.py +++ b/src/transformers/models/fsmt/tokenization_fsmt.py @@ -226,10 +226,10 @@ class FSMTTokenizer(PreTrainedTokenizer): self.do_lower_case = do_lower_case # cache of sm.MosesPunctNormalizer instance - self.cache_moses_punct_normalizer = dict() + self.cache_moses_punct_normalizer = {} # cache of sm.MosesTokenizer instance - self.cache_moses_tokenizer = dict() - self.cache_moses_detokenizer = dict() + self.cache_moses_tokenizer = {} + self.cache_moses_detokenizer = {} if langs and len(langs) == 2: self.src_lang, self.tgt_lang = langs @@ -379,7 +379,7 @@ class FSMTTokenizer(PreTrainedTokenizer): split_tokens = [] for token in text: if token: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index b7070fa0ac..f9c49db52d 100755 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -78,7 +78,7 @@ def duplicate_interleave(m): def apply_rotary_pos_emb(x, sincos, offset=0): - sin, cos = map(lambda t: duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :], sincos) + sin, cos = (duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :] for t in sincos) # einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2) return (x * cos) + (rotate_every_two(x) * sin) diff --git a/src/transformers/models/herbert/tokenization_herbert.py b/src/transformers/models/herbert/tokenization_herbert.py index 80c6cb6d63..3d07e68e18 100644 --- a/src/transformers/models/herbert/tokenization_herbert.py +++ b/src/transformers/models/herbert/tokenization_herbert.py @@ -348,10 +348,10 @@ class HerbertTokenizer(PreTrainedTokenizer): self.sm = sacremoses # cache of sm.MosesPunctNormalizer instance - self.cache_moses_punct_normalizer = dict() + self.cache_moses_punct_normalizer = {} # cache of sm.MosesTokenizer instance - self.cache_moses_tokenizer = dict() - self.lang_with_custom_tokenizer = set(["zh", "th", "ja"]) + self.cache_moses_tokenizer = {} + self.lang_with_custom_tokenizer = {"zh", "th", "ja"} # True for current supported model (v1.2.0), False for XLM-17 & 100 self.do_lowercase_and_remove_accent = do_lowercase_and_remove_accent self.lang2id = lang2id @@ -490,7 +490,7 @@ class HerbertTokenizer(PreTrainedTokenizer): split_tokens = [] for token in pre_tokens: if token: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens diff --git a/src/transformers/models/jukebox/modeling_jukebox.py b/src/transformers/models/jukebox/modeling_jukebox.py index 2528f1aa22..cac9300539 100755 --- a/src/transformers/models/jukebox/modeling_jukebox.py +++ b/src/transformers/models/jukebox/modeling_jukebox.py @@ -138,7 +138,7 @@ def get_alignment(music_tokens, labels, prior, config): hop_length = int(config.hop_fraction[-level - 1] * prior.n_ctx) alignment_head, alignment_layer = config.prior_alignment_head[0], config.prior_alignment_layer[0] - attn_layers = set([alignment_layer]) + attn_layers = {alignment_layer} alignment_hops = {} indices_hops = {} for start in tqdm(get_starts(total_length, n_ctx, hop_length), desc="Computing lyric to music alignment "): @@ -436,7 +436,7 @@ class JukeboxBottleneckBlock(nn.Module): used_curr = (_codebook_elem >= self.threshold).sum() usage = torch.sum(usage) dk = torch.norm(self.codebook - old_codebook) / np.sqrt(np.prod(old_codebook.shape)) - return dict(entropy=entropy, used_curr=used_curr, usage=usage, dk=dk) + return {"entropy": entropy, "used_curr": used_curr, "usage": usage, "dk": dk} def preprocess(self, hidden_states): hidden_states = hidden_states.permute(0, 2, 1).contiguous() @@ -2213,11 +2213,11 @@ class JukeboxPrior(PreTrainedModel): loss = self.encoder_loss_fraction * encoder_loss * self.nb_relevant_lyric_tokens / self.total_loss_dims loss += next_token_prediction_loss * self.next_token_prediction_loss_dims / self.total_loss_dims - metrics = dict( - bpd=next_token_prediction_loss.clone().detach(), - encoder_loss=encoder_loss.clone().detach(), - next_token_prediction_loss=next_token_prediction_loss.clone().detach(), - ) + metrics = { + "bpd": next_token_prediction_loss.clone().detach(), + "encoder_loss": encoder_loss.clone().detach(), + "next_token_prediction_loss": next_token_prediction_loss.clone().detach(), + } if get_preds: metrics["preds"] = preds.clone().detach() if get_attn_weights: @@ -2533,11 +2533,11 @@ class JukeboxModel(JukeboxPreTrainedModel): # total length of the signal, might be bit different from the actual generated length self.total_length = total_length for level in sample_levels: - sampling_kwargs = dict( - temp=0.99 if level == len(self.priors) - 1 else sampling_temperature, - chunk_size=chunk_size, - sample_tokens=sample_tokens, - ) + sampling_kwargs = { + "temp": 0.99 if level == len(self.priors) - 1 else sampling_temperature, + "chunk_size": chunk_size, + "sample_tokens": sample_tokens, + } # Set correct total_length, hop_length, labels and sampling_kwargs for level total_token_to_sample = total_length // self.priors[level].raw_to_tokens diff --git a/src/transformers/models/jukebox/tokenization_jukebox.py b/src/transformers/models/jukebox/tokenization_jukebox.py index 85835c6cdf..bd4d6721da 100644 --- a/src/transformers/models/jukebox/tokenization_jukebox.py +++ b/src/transformers/models/jukebox/tokenization_jukebox.py @@ -187,7 +187,7 @@ class JukeboxTokenizer(PreTrainedTokenizer): Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary. """ # only lyrics are not tokenized, but character based is easily handled - return [character for character in lyrics] + return list(lyrics) def tokenize(self, artist, genre, lyrics, **kwargs): """ diff --git a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py index d2b2323b28..c86fa6e308 100644 --- a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py @@ -42,7 +42,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p # Add special tokens to the token vocabulary for downstream tasks entity_token_1 = AddedToken("", lstrip=False, rstrip=False) entity_token_2 = AddedToken("", lstrip=False, rstrip=False) - tokenizer.add_special_tokens(dict(additional_special_tokens=[entity_token_1, entity_token_2])) + tokenizer.add_special_tokens({"additional_special_tokens": [entity_token_1, entity_token_2]}) config.vocab_size += 2 print(f"Saving tokenizer to {pytorch_dump_folder_path}") diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py index ff177a4444..89fb9b63e8 100644 --- a/src/transformers/models/luke/tokenization_luke.py +++ b/src/transformers/models/luke/tokenization_luke.py @@ -1529,7 +1529,7 @@ class LukeTokenizer(PreTrainedTokenizer): batch_outputs = {} for i in range(batch_size): - inputs = dict((k, v[i]) for k, v in encoded_inputs.items()) + inputs = {k: v[i] for k, v in encoded_inputs.items()} outputs = self._pad( inputs, max_length=max_length, diff --git a/src/transformers/models/marian/convert_marian_to_pytorch.py b/src/transformers/models/marian/convert_marian_to_pytorch.py index 1662ffb358..0eb17063c2 100644 --- a/src/transformers/models/marian/convert_marian_to_pytorch.py +++ b/src/transformers/models/marian/convert_marian_to_pytorch.py @@ -185,12 +185,12 @@ def convert_hf_name_to_opus_name(hf_model_name): def get_system_metadata(repo_root): import git - return dict( - helsinki_git_sha=git.Repo(path=repo_root, search_parent_directories=True).head.object.hexsha, - transformers_git_sha=git.Repo(path=".", search_parent_directories=True).head.object.hexsha, - port_machine=socket.gethostname(), - port_time=time.strftime("%Y-%m-%d-%H:%M"), - ) + return { + "helsinki_git_sha": git.Repo(path=repo_root, search_parent_directories=True).head.object.hexsha, + "transformers_git_sha": git.Repo(path=".", search_parent_directories=True).head.object.hexsha, + "port_machine": socket.gethostname(), + "port_time": time.strftime("%Y-%m-%d-%H:%M"), + } # docstyle-ignore @@ -366,7 +366,7 @@ def _parse_readme(lns): def save_tokenizer_config(dest_dir: Path, separate_vocabs=False): dname = dest_dir.name.split("-") - dct = dict(target_lang=dname[-1], source_lang="-".join(dname[:-1]), separate_vocabs=separate_vocabs) + dct = {"target_lang": dname[-1], "source_lang": "-".join(dname[:-1]), "separate_vocabs": separate_vocabs} save_json(dct, dest_dir / "tokenizer_config.json") diff --git a/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py index ea3e530ded..20ff7e780d 100644 --- a/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py @@ -76,7 +76,7 @@ class TrackedStateDict: Returns: List[str]: List of keys not yet updated """ - return set(list(self.to_track.keys())) - self._seen + return set(self.to_track.keys()) - self._seen def copy(self) -> Dict: # proxy the call to the internal dictionary diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index eb93391fb3..501c4ccce7 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -119,7 +119,7 @@ def binary_mask_to_rle(mask): pixels = np.concatenate([[0], pixels, [0]]) runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] - return [x for x in runs] + return list(runs) # Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle diff --git a/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py index d56777d452..1942f03666 100644 --- a/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py @@ -72,7 +72,7 @@ class TrackedStateDict: Returns: List[str]: List of keys not yet updated """ - return set(list(self.to_track.keys())) - self._seen + return set(self.to_track.keys()) - self._seen def copy(self) -> Dict: # proxy the call to the internal dictionary @@ -120,43 +120,43 @@ class OriginalMaskFormerConfigToOursConverter: num_labels=model.SEM_SEG_HEAD.NUM_CLASSES, no_object_weight=mask_former.NO_OBJECT_WEIGHT, num_queries=mask_former.NUM_OBJECT_QUERIES, - backbone_config=dict( - pretrain_img_size=swin.PRETRAIN_IMG_SIZE, - image_size=swin.PRETRAIN_IMG_SIZE, - in_channels=3, - patch_size=swin.PATCH_SIZE, - embed_dim=swin.EMBED_DIM, - depths=swin.DEPTHS, - num_heads=swin.NUM_HEADS, - window_size=swin.WINDOW_SIZE, - drop_path_rate=swin.DROP_PATH_RATE, - model_type="swin", - ), + backbone_config={ + "pretrain_img_size": swin.PRETRAIN_IMG_SIZE, + "image_size": swin.PRETRAIN_IMG_SIZE, + "in_channels": 3, + "patch_size": swin.PATCH_SIZE, + "embed_dim": swin.EMBED_DIM, + "depths": swin.DEPTHS, + "num_heads": swin.NUM_HEADS, + "window_size": swin.WINDOW_SIZE, + "drop_path_rate": swin.DROP_PATH_RATE, + "model_type": "swin", + }, dice_weight=mask_former.DICE_WEIGHT, ce_weight=1.0, mask_weight=mask_former.MASK_WEIGHT, - decoder_config=dict( - model_type="detr", - max_position_embeddings=1024, - encoder_layers=6, - encoder_ffn_dim=2048, - encoder_attention_heads=8, - decoder_layers=mask_former.DEC_LAYERS, - decoder_ffn_dim=mask_former.DIM_FEEDFORWARD, - decoder_attention_heads=mask_former.NHEADS, - encoder_layerdrop=0.0, - decoder_layerdrop=0.0, - d_model=mask_former.HIDDEN_DIM, - dropout=mask_former.DROPOUT, - attention_dropout=0.0, - activation_dropout=0.0, - init_std=0.02, - init_xavier_std=1.0, - scale_embedding=False, - auxiliary_loss=False, - dilation=False, + decoder_config={ + "model_type": "detr", + "max_position_embeddings": 1024, + "encoder_layers": 6, + "encoder_ffn_dim": 2048, + "encoder_attention_heads": 8, + "decoder_layers": mask_former.DEC_LAYERS, + "decoder_ffn_dim": mask_former.DIM_FEEDFORWARD, + "decoder_attention_heads": mask_former.NHEADS, + "encoder_layerdrop": 0.0, + "decoder_layerdrop": 0.0, + "d_model": mask_former.HIDDEN_DIM, + "dropout": mask_former.DROPOUT, + "attention_dropout": 0.0, + "activation_dropout": 0.0, + "init_std": 0.02, + "init_xavier_std": 1.0, + "scale_embedding": False, + "auxiliary_loss": False, + "dilation": False, # default pretrained config values - ), + }, id2label=id2label, label2id=label2id, ) diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index 6c3119fd30..7457d1eacd 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -123,7 +123,7 @@ def binary_mask_to_rle(mask): pixels = np.concatenate([[0], pixels, [0]]) runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] - return [x for x in runs] + return list(runs) # Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle diff --git a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py index 9d61c3bc8e..f361082fb3 100644 --- a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py @@ -46,7 +46,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p # Add special tokens to the token vocabulary for downstream tasks entity_token_1 = AddedToken("", lstrip=False, rstrip=False) entity_token_2 = AddedToken("", lstrip=False, rstrip=False) - tokenizer.add_special_tokens(dict(additional_special_tokens=[entity_token_1, entity_token_2])) + tokenizer.add_special_tokens({"additional_special_tokens": [entity_token_1, entity_token_2]}) config.vocab_size += 2 print(f"Saving tokenizer to {pytorch_dump_folder_path}") diff --git a/src/transformers/models/mluke/tokenization_mluke.py b/src/transformers/models/mluke/tokenization_mluke.py index 58cc9f11ab..c95bd69848 100644 --- a/src/transformers/models/mluke/tokenization_mluke.py +++ b/src/transformers/models/mluke/tokenization_mluke.py @@ -1328,7 +1328,7 @@ class MLukeTokenizer(PreTrainedTokenizer): batch_outputs = {} for i in range(batch_size): - inputs = dict((k, v[i]) for k, v in encoded_inputs.items()) + inputs = {k: v[i] for k, v in encoded_inputs.items()} outputs = self._pad( inputs, max_length=max_length, diff --git a/src/transformers/models/nat/modeling_nat.py b/src/transformers/models/nat/modeling_nat.py index d455d9e5ee..4b34fe730c 100644 --- a/src/transformers/models/nat/modeling_nat.py +++ b/src/transformers/models/nat/modeling_nat.py @@ -877,7 +877,7 @@ class NatBackbone(NatPreTrainedModel, BackboneMixin): self.out_feature_channels[stage] = num_features[i] # Add layer norms to hidden states of out_features - hidden_states_norms = dict() + hidden_states_norms = {} for stage, num_channels in zip(self.out_features, self.channels): hidden_states_norms[stage] = nn.LayerNorm(num_channels) self.hidden_states_norms = nn.ModuleDict(hidden_states_norms) diff --git a/src/transformers/models/oneformer/convert_to_hf_oneformer.py b/src/transformers/models/oneformer/convert_to_hf_oneformer.py index bfe2aee5e2..9dbd32f9d3 100644 --- a/src/transformers/models/oneformer/convert_to_hf_oneformer.py +++ b/src/transformers/models/oneformer/convert_to_hf_oneformer.py @@ -82,7 +82,7 @@ class TrackedStateDict: Returns: List[str]: List of keys not yet updated """ - return set(list(self.to_track.keys())) - self._seen + return set(self.to_track.keys()) - self._seen def copy(self) -> Dict: # proxy the call to the internal dictionary diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py index b1e93c9e39..2573844995 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer.py +++ b/src/transformers/models/oneformer/image_processing_oneformer.py @@ -120,7 +120,7 @@ def binary_mask_to_rle(mask): pixels = np.concatenate([[0], pixels, [0]]) runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] - return [x for x in runs] + return list(runs) # Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle diff --git a/src/transformers/models/openai/tokenization_openai.py b/src/transformers/models/openai/tokenization_openai.py index 96fd492dbb..36035eafec 100644 --- a/src/transformers/models/openai/tokenization_openai.py +++ b/src/transformers/models/openai/tokenization_openai.py @@ -342,12 +342,12 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer): # Using BERT's BasicTokenizer text = self.nlp.tokenize(text) for token in text: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) else: # Using SpaCy & ftfy (original tokenization process of OpenAI GPT) text = self.nlp(text_standardize(self.fix_text(text))) for token in text: - split_tokens.extend([t for t in self.bpe(token.text.lower()).split(" ")]) + split_tokens.extend(list(self.bpe(token.text.lower()).split(" "))) return split_tokens def _convert_token_to_id(self, token): diff --git a/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py b/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py index d2ea6b0a6c..934c23b4d3 100644 --- a/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py +++ b/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py @@ -37,42 +37,42 @@ from transformers import ( CONFIGS = { - "vit_b32": dict( - embed_dim=512, - image_resolution=768, - context_length=16, - vocab_size=49408, - vision_layers=12, - vision_width=768, - vision_patch_size=32, - transformer_width=512, - transformer_heads=8, - transformer_layers=12, - ), - "vit_b16": dict( - embed_dim=512, - image_resolution=768, - context_length=16, - vocab_size=49408, - vision_layers=12, - vision_width=768, - vision_patch_size=16, - transformer_width=512, - transformer_heads=8, - transformer_layers=12, - ), - "vit_l14": dict( - embed_dim=768, - image_resolution=840, - context_length=16, - vocab_size=49408, - vision_layers=24, - vision_width=1024, - vision_patch_size=14, - transformer_width=768, - transformer_heads=12, - transformer_layers=12, - ), + "vit_b32": { + "embed_dim": 512, + "image_resolution": 768, + "context_length": 16, + "vocab_size": 49408, + "vision_layers": 12, + "vision_width": 768, + "vision_patch_size": 32, + "transformer_width": 512, + "transformer_heads": 8, + "transformer_layers": 12, + }, + "vit_b16": { + "embed_dim": 512, + "image_resolution": 768, + "context_length": 16, + "vocab_size": 49408, + "vision_layers": 12, + "vision_width": 768, + "vision_patch_size": 16, + "transformer_width": 512, + "transformer_heads": 8, + "transformer_layers": 12, + }, + "vit_l14": { + "embed_dim": 768, + "image_resolution": 840, + "context_length": 16, + "vocab_size": 49408, + "vision_layers": 24, + "vision_width": 1024, + "vision_patch_size": 14, + "transformer_width": 768, + "transformer_heads": 12, + "transformer_layers": 12, + }, } diff --git a/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py b/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py index 9c925313a3..9b9b3cb454 100644 --- a/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py +++ b/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py @@ -283,7 +283,7 @@ def convert_perceiver_checkpoint(pickle_file, pytorch_dump_folder_path, architec params = checkpoint # turn into initial state dict - state_dict = dict() + state_dict = {} for scope_name, parameters in hk.data_structures.to_mutable_dict(params).items(): for param_name, param in parameters.items(): state_dict[scope_name + "/" + param_name] = param @@ -398,7 +398,7 @@ def convert_perceiver_checkpoint(pickle_file, pytorch_dump_folder_path, architec elif architecture == "multimodal_autoencoding": images = torch.randn((1, 16, 3, 224, 224)) audio = torch.randn((1, 30720, 1)) - inputs = dict(image=images, audio=audio, label=torch.zeros((images.shape[0], 700))) + inputs = {"image": images, "audio": audio, "label": torch.zeros((images.shape[0], 700))} # forward pass if architecture == "multimodal_autoencoding": diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index c9b06fcded..7008b04ec8 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -957,9 +957,10 @@ class PerceiverForMaskedLM(PerceiverPreTrainedModel): text_preprocessor = PerceiverTextPreprocessor(config) - trainable_position_encoding_kwargs_decoder = dict( - num_channels=text_preprocessor.num_channels, index_dims=config.max_position_embeddings - ) + trainable_position_encoding_kwargs_decoder = { + "num_channels": text_preprocessor.num_channels, + "index_dims": config.max_position_embeddings, + } self.perceiver = PerceiverModel( config, @@ -1089,7 +1090,7 @@ class PerceiverForSequenceClassification(PerceiverPreTrainedModel): def __init__(self, config): super().__init__(config) - trainable_position_encoding_kwargs_decoder = dict(num_channels=config.d_latents, index_dims=1) + trainable_position_encoding_kwargs_decoder = {"num_channels": config.d_latents, "index_dims": 1} self.num_labels = config.num_labels self.perceiver = PerceiverModel( @@ -1214,8 +1215,8 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel): def __init__(self, config): super().__init__(config) - trainable_position_encoding_kwargs_preprocessor = dict(num_channels=256, index_dims=config.image_size**2) - trainable_position_encoding_kwargs_decoder = dict(num_channels=config.d_latents, index_dims=1) + trainable_position_encoding_kwargs_preprocessor = {"num_channels": 256, "index_dims": config.image_size**2} + trainable_position_encoding_kwargs_decoder = {"num_channels": config.d_latents, "index_dims": 1} self.num_labels = config.num_labels self.perceiver = PerceiverModel( @@ -1357,10 +1358,13 @@ class PerceiverForImageClassificationFourier(PerceiverPreTrainedModel): def __init__(self, config): super().__init__(config) - fourier_position_encoding_kwargs_preprocessor = dict( - concat_pos=True, max_resolution=(224, 224), num_bands=64, sine_only=False - ) - trainable_position_encoding_kwargs_decoder = dict(num_channels=config.d_latents, index_dims=1) + fourier_position_encoding_kwargs_preprocessor = { + "concat_pos": True, + "max_resolution": (224, 224), + "num_bands": 64, + "sine_only": False, + } + trainable_position_encoding_kwargs_decoder = {"num_channels": config.d_latents, "index_dims": 1} self.num_labels = config.num_labels self.perceiver = PerceiverModel( @@ -1497,10 +1501,13 @@ class PerceiverForImageClassificationConvProcessing(PerceiverPreTrainedModel): def __init__(self, config): super().__init__(config) - fourier_position_encoding_kwargs_preprocessor = dict( - concat_pos=True, max_resolution=(56, 56), num_bands=64, sine_only=False - ) - trainable_position_encoding_kwargs_decoder = dict(num_channels=config.d_latents, index_dims=1) + fourier_position_encoding_kwargs_preprocessor = { + "concat_pos": True, + "max_resolution": (56, 56), + "num_bands": 64, + "sine_only": False, + } + trainable_position_encoding_kwargs_decoder = {"num_channels": config.d_latents, "index_dims": 1} self.num_labels = config.num_labels self.perceiver = PerceiverModel( @@ -1638,15 +1645,18 @@ class PerceiverForOpticalFlow(PerceiverPreTrainedModel): def __init__(self, config): super().__init__(config) - fourier_position_encoding_kwargs_preprocessor = dict( - num_bands=64, - max_resolution=config.train_size, - sine_only=False, - concat_pos=True, - ) - fourier_position_encoding_kwargs_decoder = dict( - concat_pos=True, max_resolution=config.train_size, num_bands=64, sine_only=False - ) + fourier_position_encoding_kwargs_preprocessor = { + "num_bands": 64, + "max_resolution": config.train_size, + "sine_only": False, + "concat_pos": True, + } + fourier_position_encoding_kwargs_decoder = { + "concat_pos": True, + "max_resolution": config.train_size, + "num_bands": 64, + "sine_only": False, + } image_preprocessor = PerceiverImagePreprocessor( config, @@ -1788,24 +1798,24 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): "audio": PerceiverAudioPreprocessor( config, position_encoding_type="fourier", - fourier_position_encoding_kwargs=dict( - num_bands=192, - max_resolution=(n_audio_samples,), - sine_only=False, - concat_pos=True, - ), + fourier_position_encoding_kwargs={ + "num_bands": 192, + "max_resolution": (n_audio_samples,), + "sine_only": False, + "concat_pos": True, + }, prep_type="patches", samples_per_patch=config.samples_per_patch, ), "image": PerceiverImagePreprocessor( config, position_encoding_type="fourier", - fourier_position_encoding_kwargs=dict( - num_bands=32, - max_resolution=(config.num_frames, config.image_size, config.image_size), - sine_only=False, - concat_pos=True, - ), + fourier_position_encoding_kwargs={ + "num_bands": 32, + "max_resolution": (config.num_frames, config.image_size, config.image_size), + "sine_only": False, + "concat_pos": True, + }, prep_type="patches", spatial_downsample=4, temporal_downsample=1, @@ -1824,12 +1834,12 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): use_query_residual=False, position_encoding_only=True, position_encoding_type="fourier", - fourier_position_encoding_kwargs=dict( - num_bands=32, - max_resolution=(config.num_frames, config.image_size, config.image_size), - sine_only=False, - concat_pos=True, - ), + fourier_position_encoding_kwargs={ + "num_bands": 32, + "max_resolution": (config.num_frames, config.image_size, config.image_size), + "sine_only": False, + "concat_pos": True, + }, ) decoder = PerceiverMultimodalDecoder( @@ -1848,12 +1858,12 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): use_query_residual=False, position_encoding_only=True, position_encoding_type="fourier", - fourier_position_encoding_kwargs=dict( - num_bands=192, - max_resolution=(n_audio_samples,), - sine_only=False, - concat_pos=True, - ), + fourier_position_encoding_kwargs={ + "num_bands": 192, + "max_resolution": (n_audio_samples,), + "sine_only": False, + "concat_pos": True, + }, ), "image": image_decoder, "label": PerceiverClassificationDecoder( @@ -1863,10 +1873,10 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): use_query_residual=False, position_encoding_only=True, position_encoding_type="trainable", - trainable_position_encoding_kwargs=dict( - num_channels=1024, - index_dims=1, - ), + trainable_position_encoding_kwargs={ + "num_channels": 1024, + "index_dims": 1, + }, ), }, num_outputs=None, @@ -2180,9 +2190,7 @@ class PerceiverBasicDecoder(PerceiverAbstractDecoder): # to get the indices for the unflattened array # unravel_index returns a tuple (x_idx, y_idx, ...) # stack to get the [n, d] tensor of coordinates - indices = list( - torch.from_numpy(x) for x in np.unravel_index(subsampled_points.cpu(), self.output_index_dims) - ) + indices = [torch.from_numpy(x) for x in np.unravel_index(subsampled_points.cpu(), self.output_index_dims)] pos = torch.stack(indices, dim=1) batch_size = inputs.shape[0] # Map these coordinates to [-1, 1] @@ -2476,9 +2484,9 @@ class PerceiverMultimodalDecoder(PerceiverAbstractDecoder): inputs = restructure(modality_sizes, inputs) # Obtain modality-specific decoders' queries - subsampled_points = subsampled_points or dict() + subsampled_points = subsampled_points or {} - decoder_queries = dict() + decoder_queries = {} for modality, decoder in self.modalities.items(): # Get input_without_pos for this modality if it exists. input_without_pos = None @@ -3363,7 +3371,7 @@ class PerceiverMultimodalPreprocessor(AbstractPreprocessor): super().__init__() self.modalities = nn.ModuleDict(modalities) self.min_padding_size = min_padding_size - self.mask_probs = mask_probs if mask_probs is not None else dict() + self.mask_probs = mask_probs if mask_probs is not None else {} self.padding = nn.ParameterDict( { modality: nn.Parameter(torch.randn(1, self.num_channels - preprocessor.num_channels)) diff --git a/src/transformers/models/phobert/tokenization_phobert.py b/src/transformers/models/phobert/tokenization_phobert.py index dd294ac43a..4011ea8b56 100644 --- a/src/transformers/models/phobert/tokenization_phobert.py +++ b/src/transformers/models/phobert/tokenization_phobert.py @@ -297,7 +297,7 @@ class PhobertTokenizer(PreTrainedTokenizer): words = re.findall(r"\S+\n?", text) for token in words: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens def _convert_token_to_id(self, token): diff --git a/src/transformers/models/realm/tokenization_realm.py b/src/transformers/models/realm/tokenization_realm.py index de067b0594..a6c09f1b97 100644 --- a/src/transformers/models/realm/tokenization_realm.py +++ b/src/transformers/models/realm/tokenization_realm.py @@ -294,7 +294,7 @@ class RealmTokenizer(PreTrainedTokenizer): if encoded_token_type_ids is not None: output_data["token_type_ids"].append(encoded_token_type_ids) - output_data = dict((key, item) for key, item in output_data.items() if len(item) != 0) + output_data = {key: item for key, item in output_data.items() if len(item) != 0} return BatchEncoding(output_data, tensor_type=return_tensors) diff --git a/src/transformers/models/realm/tokenization_realm_fast.py b/src/transformers/models/realm/tokenization_realm_fast.py index 4db8b165b9..1cc1a99665 100644 --- a/src/transformers/models/realm/tokenization_realm_fast.py +++ b/src/transformers/models/realm/tokenization_realm_fast.py @@ -259,7 +259,7 @@ class RealmTokenizerFast(PreTrainedTokenizerFast): if encoded_token_type_ids is not None: output_data["token_type_ids"].append(encoded_token_type_ids) - output_data = dict((key, item) for key, item in output_data.items() if len(item) != 0) + output_data = {key: item for key, item in output_data.items() if len(item) != 0} return BatchEncoding(output_data, tensor_type=return_tensors) diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index 9b24b342bf..ff90b9ac9a 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -87,7 +87,7 @@ def _get_least_common_mult_chunk_len(config): return config.lsh_attn_chunk_length elif len(attn_types_set) == 1 and attn_types[0] == "local": return config.local_attn_chunk_length - elif len(attn_types_set) == 2 and attn_types_set == set(["lsh", "local"]): + elif len(attn_types_set) == 2 and attn_types_set == {"lsh", "local"}: return np.lcm(config.lsh_attn_chunk_length, config.local_attn_chunk_length) else: raise NotImplementedError( @@ -103,7 +103,7 @@ def _get_min_chunk_len(config): return config.lsh_attn_chunk_length elif len(attn_types_set) == 1 and attn_types[0] == "local": return config.local_attn_chunk_length - elif len(attn_types_set) == 2 and attn_types_set == set(["lsh", "local"]): + elif len(attn_types_set) == 2 and attn_types_set == {"lsh", "local"}: return min(config.lsh_attn_chunk_length, config.local_attn_chunk_length) else: raise NotImplementedError( @@ -1277,7 +1277,7 @@ class ReformerAttention(nn.Module): self.self_attention = LSHSelfAttention(config) elif len(set(self.attn_layers)) == 1 and self.attn_layers[0] == "local": self.self_attention = LocalSelfAttention(config) - elif len(set(self.attn_layers)) == 2 and set(self.attn_layers) == set(["lsh", "local"]): + elif len(set(self.attn_layers)) == 2 and set(self.attn_layers) == {"lsh", "local"}: # get correct attn layers if self.attn_layers[self.layer_id] == "lsh": self.self_attention = LSHSelfAttention(config) diff --git a/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py index 22a8a99ca2..f379b40d2a 100644 --- a/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py +++ b/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py @@ -60,7 +60,7 @@ class Tracker: for name, m in self.module.named_modules(): self.handles.append(m.register_forward_hook(partial(self._forward_hook, name=name))) self.module(x) - list(map(lambda x: x.remove(), self.handles)) + [x.remove() for x in self.handles] return self @property diff --git a/src/transformers/models/regnet/convert_regnet_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_to_pytorch.py index 6b34c6aa19..1228e65c46 100644 --- a/src/transformers/models/regnet/convert_regnet_to_pytorch.py +++ b/src/transformers/models/regnet/convert_regnet_to_pytorch.py @@ -53,7 +53,7 @@ class Tracker: for m in self.module.modules(): self.handles.append(m.register_forward_hook(self._forward_hook)) self.module(x) - list(map(lambda x: x.remove(), self.handles)) + [x.remove() for x in self.handles] return self @property diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py index b1759d71b0..2c3a1ac42e 100644 --- a/src/transformers/models/regnet/modeling_tf_regnet.py +++ b/src/transformers/models/regnet/modeling_tf_regnet.py @@ -247,7 +247,7 @@ class TFRegNetStage(tf.keras.layers.Layer): class TFRegNetEncoder(tf.keras.layers.Layer): def __init__(self, config: RegNetConfig, **kwargs): super().__init__(**kwargs) - self.stages = list() + self.stages = [] # based on `downsample_in_first_stage`, the first layer of the first stage may or may not downsample the input self.stages.append( TFRegNetStage( diff --git a/src/transformers/models/rembert/tokenization_rembert.py b/src/transformers/models/rembert/tokenization_rembert.py index cff101451b..2a3c6e4faf 100644 --- a/src/transformers/models/rembert/tokenization_rembert.py +++ b/src/transformers/models/rembert/tokenization_rembert.py @@ -219,7 +219,7 @@ class RemBertTokenizer(PreTrainedTokenizer): "You should not supply a second sequence if the provided sequence of " "ids is already formatted with special tokens for the model." ) - return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) + return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0] if token_ids_1 is not None: return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] diff --git a/src/transformers/models/rembert/tokenization_rembert_fast.py b/src/transformers/models/rembert/tokenization_rembert_fast.py index 5d5032f411..bc9593c0b5 100644 --- a/src/transformers/models/rembert/tokenization_rembert_fast.py +++ b/src/transformers/models/rembert/tokenization_rembert_fast.py @@ -191,7 +191,7 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast): "You should not supply a second sequence if the provided sequence of " "ids is already formatted with special tokens for the model." ) - return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) + return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0] if token_ids_1 is not None: return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1] diff --git a/src/transformers/models/resnet/convert_resnet_to_pytorch.py b/src/transformers/models/resnet/convert_resnet_to_pytorch.py index 5f836c9d2a..f32887c964 100644 --- a/src/transformers/models/resnet/convert_resnet_to_pytorch.py +++ b/src/transformers/models/resnet/convert_resnet_to_pytorch.py @@ -51,7 +51,7 @@ class Tracker: for m in self.module.modules(): self.handles.append(m.register_forward_hook(self._forward_hook)) self.module(x) - list(map(lambda x: x.remove(), self.handles)) + [x.remove() for x in self.handles] return self @property diff --git a/src/transformers/models/roc_bert/modeling_roc_bert.py b/src/transformers/models/roc_bert/modeling_roc_bert.py index c8c85ff142..af7ac57410 100644 --- a/src/transformers/models/roc_bert/modeling_roc_bert.py +++ b/src/transformers/models/roc_bert/modeling_roc_bert.py @@ -1240,7 +1240,7 @@ class RoCBertForPreTraining(RoCBertPreTrainedModel): sim_matrix = torch.matmul(pooled_output_norm, attack_pooled_output_norm.T) # batch_size * hidden_dim sim_matrix_target = torch.matmul(labels_pooled_output_norm, attack_pooled_output_norm.T) - batch_labels = torch.tensor([i for i in range(batch_size)], device=device) + batch_labels = torch.tensor(list(range(batch_size)), device=device) contrastive_loss = ( loss_fct(100 * sim_matrix.view(batch_size, -1), batch_labels.view(-1)) + loss_fct(100 * sim_matrix_target.view(batch_size, -1), batch_labels.view(-1)) diff --git a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py index 6c1cd993fe..eb4d852624 100644 --- a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py +++ b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py @@ -95,12 +95,10 @@ def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_ model = Speech2TextForConditionalGeneration(config) missing, unexpected = model.model.load_state_dict(state_dict, strict=False) - if len(missing) > 0 and not set(missing) <= set( - [ - "encoder.embed_positions.weights", - "decoder.embed_positions.weights", - ] - ): + if len(missing) > 0 and not set(missing) <= { + "encoder.embed_positions.weights", + "decoder.embed_positions.weights", + }: raise ValueError( "Only `encoder.embed_positions.weights` and `decoder.embed_positions.weights` are allowed to be missing," f" but all the following weights are missing {missing}" diff --git a/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py index 4c90ba05ba..c021619cd0 100644 --- a/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py @@ -213,7 +213,7 @@ class Speech2Text2Tokenizer(PreTrainedTokenizer): split_tokens = [] for token in text: if token: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index abf47cf831..5f572c23a8 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -1259,7 +1259,7 @@ class SwinBackbone(SwinPreTrainedModel, BackboneMixin): self.out_feature_channels[stage] = num_features[i] # Add layer norms to hidden states of out_features - hidden_states_norms = dict() + hidden_states_norms = {} for stage, num_channels in zip(self.out_features, self.channels): hidden_states_norms[stage] = nn.LayerNorm(num_channels) self.hidden_states_norms = nn.ModuleDict(hidden_states_norms) diff --git a/src/transformers/models/tapas/tokenization_tapas.py b/src/transformers/models/tapas/tokenization_tapas.py index 395ec876c9..0bd558aee8 100644 --- a/src/transformers/models/tapas/tokenization_tapas.py +++ b/src/transformers/models/tapas/tokenization_tapas.py @@ -1688,7 +1688,7 @@ class TapasTokenizer(PreTrainedTokenizer): for col_index in range(num_columns): for row_index in range(num_rows): - indices = [index for index in self._get_cell_token_indexes(column_ids, row_ids, col_index, row_index)] + indices = list(self._get_cell_token_indexes(column_ids, row_ids, col_index, row_index)) num_indices = len(indices) if num_indices > 1: for index in indices: diff --git a/src/transformers/models/tapex/tokenization_tapex.py b/src/transformers/models/tapex/tokenization_tapex.py index c41c6cbe47..e2543a3378 100644 --- a/src/transformers/models/tapex/tokenization_tapex.py +++ b/src/transformers/models/tapex/tokenization_tapex.py @@ -1453,16 +1453,16 @@ class TapexTokenizer(PreTrainedTokenizer): truncated_unrelated_indices = [] related_indices = [] if answer is None or len(answer) == 0: - answer_set = set([]) + answer_set = set() else: - answer_set = set([ans_ex.lower() for ans_ex in answer]) + answer_set = {ans_ex.lower() for ans_ex in answer} # add question key words into answer set if question is not None: answer_set.update(question.split()) question_set = set(question.strip("?!.,").split(" ")) row_max_len = len(table_content["rows"]) for _row_idx, row in enumerate(table_content["rows"]): - lower_row = set([str(cell).lower() for cell in row]) + lower_row = {str(cell).lower() for cell in row} if len(lower_row & answer_set) == 0 and len(lower_row & question_set) == 0: truncated_unrelated_indices.append(_row_idx) else: diff --git a/src/transformers/models/van/convert_van_to_pytorch.py b/src/transformers/models/van/convert_van_to_pytorch.py index a8086e6d1b..0cb51e59e6 100644 --- a/src/transformers/models/van/convert_van_to_pytorch.py +++ b/src/transformers/models/van/convert_van_to_pytorch.py @@ -55,7 +55,7 @@ class Tracker: for m in self.module.modules(): self.handles.append(m.register_forward_hook(self._forward_hook)) self.module(x) - list(map(lambda x: x.remove(), self.handles)) + [x.remove() for x in self.handles] return self @property diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py index 61cc69b694..6704fe42b1 100755 --- a/src/transformers/models/vilt/modeling_vilt.py +++ b/src/transformers/models/vilt/modeling_vilt.py @@ -171,7 +171,7 @@ class ViltEmbeddings(nn.Module): non_valid_nums = [v.size(0) for v in non_valid_row_idx] pad_nums = [max_image_length - v for v in valid_nums] - select = list() + select = [] for i, (v, nv, p) in enumerate(zip(valid_nums, non_valid_nums, pad_nums)): if p <= 0: valid_choice = torch.multinomial(torch.ones(v).float(), max_image_length) diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index 42fd1131cf..54888aea2c 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -648,7 +648,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): if self.verbose: logger.info(f"Adding {token} to the vocabulary") - added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add)) + added_tok_encoder = {tok: len(self) + i for i, tok in enumerate(tokens_to_add)} added_tok_decoder = {v: k for k, v in added_tok_encoder.items()} self.added_tokens_encoder.update(added_tok_encoder) self.added_tokens_decoder.update(added_tok_decoder) diff --git a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py index 74e2d3525b..f3ad23a1cd 100644 --- a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +++ b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py @@ -615,7 +615,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer): if self.verbose: logger.info(f"Adding {token} to the vocabulary") - added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add)) + added_tok_encoder = {tok: len(self) + i for i, tok in enumerate(tokens_to_add)} added_tok_decoder = {v: k for k, v in added_tok_encoder.items()} self.added_tokens_encoder.update(added_tok_encoder) self.added_tokens_decoder.update(added_tok_decoder) diff --git a/src/transformers/models/whisper/convert_openai_to_hf.py b/src/transformers/models/whisper/convert_openai_to_hf.py index 7c2e0c40a0..3e7d42634b 100644 --- a/src/transformers/models/whisper/convert_openai_to_hf.py +++ b/src/transformers/models/whisper/convert_openai_to_hf.py @@ -157,12 +157,10 @@ def convert_openai_whisper_to_tfms(checkpoint_path, pytorch_dump_folder_path): model = WhisperForConditionalGeneration(config) missing, unexpected = model.model.load_state_dict(state_dict, strict=False) - if len(missing) > 0 and not set(missing) <= set( - [ - "encoder.embed_positions.weights", - "decoder.embed_positions.weights", - ] - ): + if len(missing) > 0 and not set(missing) <= { + "encoder.embed_positions.weights", + "decoder.embed_positions.weights", + }: raise ValueError( "Only `encoder.embed_positions.weights` and `decoder.embed_positions.weights` are allowed to be missing," f" but all the following weights are missing {missing}" diff --git a/src/transformers/models/whisper/english_normalizer.py b/src/transformers/models/whisper/english_normalizer.py index e72d2e89b2..7f6aab4ad2 100644 --- a/src/transformers/models/whisper/english_normalizer.py +++ b/src/transformers/models/whisper/english_normalizer.py @@ -189,25 +189,23 @@ class EnglishNumberNormalizer: } self.specials = {"and", "double", "triple", "point"} - self.words = set( - [ - key - for mapping in [ - self.zeros, - self.ones, - self.ones_suffixed, - self.tens, - self.tens_suffixed, - self.multipliers, - self.multipliers_suffixed, - self.preceding_prefixers, - self.following_prefixers, - self.suffixers, - self.specials, - ] - for key in mapping + self.words = { + key + for mapping in [ + self.zeros, + self.ones, + self.ones_suffixed, + self.tens, + self.tens_suffixed, + self.multipliers, + self.multipliers_suffixed, + self.preceding_prefixers, + self.following_prefixers, + self.suffixers, + self.specials, ] - ) + for key in mapping + } self.literal_words = {"one", "ones"} def process_words(self, words: List[str]) -> Iterator[str]: diff --git a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py index 4221cdfc90..6f3cdf920a 100755 --- a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py @@ -43,10 +43,10 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p two_levels_state_dict["transformer." + k] = v config = chkpt["params"] - config = dict((n, v) for n, v in config.items() if not isinstance(v, (torch.FloatTensor, numpy.ndarray))) + config = {n: v for n, v in config.items() if not isinstance(v, (torch.FloatTensor, numpy.ndarray))} vocab = chkpt["dico_word2id"] - vocab = dict((s + "" if s.find("@@") == -1 and i > 13 else s.replace("@@", ""), i) for s, i in vocab.items()) + vocab = {s + "" if s.find("@@") == -1 and i > 13 else s.replace("@@", ""): i for s, i in vocab.items()} # Save pytorch-model pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME diff --git a/src/transformers/models/xlm/tokenization_xlm.py b/src/transformers/models/xlm/tokenization_xlm.py index cbfb2b48ff..5cab4fc996 100644 --- a/src/transformers/models/xlm/tokenization_xlm.py +++ b/src/transformers/models/xlm/tokenization_xlm.py @@ -638,10 +638,10 @@ class XLMTokenizer(PreTrainedTokenizer): self.sm = sacremoses # cache of sm.MosesPunctNormalizer instance - self.cache_moses_punct_normalizer = dict() + self.cache_moses_punct_normalizer = {} # cache of sm.MosesTokenizer instance - self.cache_moses_tokenizer = dict() - self.lang_with_custom_tokenizer = set(["zh", "th", "ja"]) + self.cache_moses_tokenizer = {} + self.lang_with_custom_tokenizer = {"zh", "th", "ja"} # True for current supported model (v1.2.0), False for XLM-17 & 100 self.do_lowercase_and_remove_accent = do_lowercase_and_remove_accent self.lang2id = lang2id @@ -851,7 +851,7 @@ class XLMTokenizer(PreTrainedTokenizer): split_tokens = [] for token in text: if token: - split_tokens.extend([t for t in self.bpe(token).split(" ")]) + split_tokens.extend(list(self.bpe(token).split(" "))) return split_tokens diff --git a/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py index 151606d196..6352b71300 100644 --- a/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py @@ -142,7 +142,7 @@ def convert_xmod_checkpoint_to_pytorch( bert_output.adapter_layer_norm.weight = xmod_layer.adapter_layer_norm.weight bert_output.adapter_layer_norm.bias = xmod_layer.adapter_layer_norm.bias - if list(sorted(bert_output.adapter_modules.keys())) != list(sorted(xmod_layer.adapter_modules.keys())): + if sorted(bert_output.adapter_modules.keys()) != sorted(xmod_layer.adapter_modules.keys()): raise AssertionError("Lists of language adapters do not match.") for lang_code, adapter in xmod_layer.adapter_modules.items(): to_adapter = bert_output.adapter_modules[lang_code] diff --git a/src/transformers/models/xmod/modeling_xmod.py b/src/transformers/models/xmod/modeling_xmod.py index 354d04bac6..c19b8fabaa 100644 --- a/src/transformers/models/xmod/modeling_xmod.py +++ b/src/transformers/models/xmod/modeling_xmod.py @@ -395,7 +395,7 @@ class XmodOutput(nn.Module): else: self.adapter_layer_norm = None self.adapter_reuse_layer_norm = config.adapter_reuse_layer_norm - self.adapter_modules = nn.ModuleDict(dict()) + self.adapter_modules = nn.ModuleDict({}) for language in config.languages: self.adapter_modules[str(language)] = XmodAdapter(config) diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index f49d5d14fd..a8fb00aee5 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -515,7 +515,7 @@ def binary_mask_to_rle(mask): pixels = np.concatenate([[0], pixels, [0]]) runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] - return [x for x in runs] + return list(runs) # Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py index ee9c498e73..918134d311 100644 --- a/src/transformers/onnx/convert.py +++ b/src/transformers/onnx/convert.py @@ -145,7 +145,7 @@ def export_pytorch( device = torch.device(device) if device.type == "cuda" and torch.cuda.is_available(): model.to(device) - model_inputs_device = dict() + model_inputs_device = {} for k, v in model_inputs.items(): if isinstance(v, Tuple): model_inputs_device[k] = tuple( diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index 47201b0924..659b92a59b 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -358,7 +358,7 @@ class AdamW(Optimizer): raise ValueError(f"Invalid beta parameter: {betas[1]} - should be in [0.0, 1.0)") if not 0.0 <= eps: raise ValueError(f"Invalid epsilon value: {eps} - should be >= 0.0") - defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, correct_bias=correct_bias) + defaults = {"lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay, "correct_bias": correct_bias} super().__init__(params, defaults) def step(self, closure: Callable = None): @@ -527,17 +527,17 @@ class Adafactor(Optimizer): if warmup_init and not relative_step: raise ValueError("`warmup_init=True` requires `relative_step=True`") - defaults = dict( - lr=lr, - eps=eps, - clip_threshold=clip_threshold, - decay_rate=decay_rate, - beta1=beta1, - weight_decay=weight_decay, - scale_parameter=scale_parameter, - relative_step=relative_step, - warmup_init=warmup_init, - ) + defaults = { + "lr": lr, + "eps": eps, + "clip_threshold": clip_threshold, + "decay_rate": decay_rate, + "beta1": beta1, + "weight_decay": weight_decay, + "scale_parameter": scale_parameter, + "relative_step": relative_step, + "warmup_init": warmup_init, + } super().__init__(params, defaults) @staticmethod diff --git a/src/transformers/optimization_tf.py b/src/transformers/optimization_tf.py index db7238d7f4..b42e04041b 100644 --- a/src/transformers/optimization_tf.py +++ b/src/transformers/optimization_tf.py @@ -262,7 +262,7 @@ class AdamWeightDecay(Adam): coefficients = self._fallback_apply_state(var_device, var_dtype) apply_state[(var_device, var_dtype)] = coefficients - return coefficients["lr_t"], dict(apply_state=apply_state) + return coefficients["lr_t"], {"apply_state": apply_state} def _resource_apply_dense(self, grad, var, apply_state=None): lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state) @@ -333,7 +333,7 @@ class GradientAccumulator(object): """The accumulated gradients on the current replica.""" if not self._gradients: raise ValueError("The accumulator should be called first to initialize the gradients") - return list(gradient.value() if gradient is not None else gradient for gradient in self._gradients) + return [gradient.value() if gradient is not None else gradient for gradient in self._gradients] def __call__(self, gradients): """Accumulates `gradients` on the current replica.""" diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 528e83d8f1..054c7e57a7 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -1083,7 +1083,7 @@ class Pipeline(_ScikitCompat): final_iterator = self.get_iterator( inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params ) - outputs = [output for output in final_iterator] + outputs = list(final_iterator) return outputs else: return self.run_multi(inputs, preprocess_params, forward_params, postprocess_params) diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py index 746d3c1eae..884cee78ca 100644 --- a/src/transformers/pipelines/question_answering.py +++ b/src/transformers/pipelines/question_answering.py @@ -210,7 +210,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler): inputs = [inputs] elif isinstance(inputs, Iterable): # Copy to avoid overriding arguments - inputs = [i for i in inputs] + inputs = list(inputs) else: raise ValueError(f"Invalid arguments {kwargs}") diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 4dbbee4144..3398ee3091 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -425,7 +425,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): if self.verbose: logger.info(f"Adding {token} to the vocabulary") - added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add)) + added_tok_encoder = {tok: len(self) + i for i, tok in enumerate(tokens_to_add)} added_tok_decoder = {v: k for k, v in added_tok_encoder.items()} self.added_tokens_encoder.update(added_tok_encoder) self.added_tokens_decoder.update(added_tok_decoder) @@ -495,9 +495,9 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): `List[str]`: The list of tokens. """ # Simple mapping string => AddedToken for special tokens with specific tokenization behaviors - all_special_tokens_extended = dict( - (str(t), t) for t in self.all_special_tokens_extended if isinstance(t, AddedToken) - ) + all_special_tokens_extended = { + str(t): t for t in self.all_special_tokens_extended if isinstance(t, AddedToken) + } text, kwargs = self.prepare_for_tokenization(text, **kwargs) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index c11000111b..eb52ef0adb 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1918,7 +1918,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): obj.pop("__type") return AddedToken(**obj) elif isinstance(obj, (list, tuple)): - return list(convert_added_tokens(o) for o in obj) + return [convert_added_tokens(o) for o in obj] elif isinstance(obj, dict): return {k: convert_added_tokens(v) for k, v in obj.items()} return obj @@ -1992,7 +1992,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): added_tok_encoder = json.load(added_tokens_handle) # Sort added tokens by index - added_tok_encoder_sorted = list(sorted(added_tok_encoder.items(), key=lambda x: x[1])) + added_tok_encoder_sorted = sorted(added_tok_encoder.items(), key=lambda x: x[1]) # Accumulate added tokens into batches of special/non-special tokens, because calling add_tokens() for # individual tokens would repeatedly rebuild a trie, which can be slow. @@ -2129,7 +2129,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): out["__type"] = "AddedToken" return out elif isinstance(obj, (list, tuple)): - return list(convert_added_tokens(o, add_type_field=add_type_field) for o in obj) + return [convert_added_tokens(o, add_type_field=add_type_field) for o in obj] elif isinstance(obj, dict): return {k: convert_added_tokens(v, add_type_field=add_type_field) for k, v in obj.items()} return obj @@ -2502,23 +2502,23 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): you must set `is_split_into_words=True` (to lift the ambiguity with a batch of sequences). """ # To avoid duplicating - all_kwargs = dict( - add_special_tokens=add_special_tokens, - padding=padding, - truncation=truncation, - max_length=max_length, - stride=stride, - is_split_into_words=is_split_into_words, - pad_to_multiple_of=pad_to_multiple_of, - return_tensors=return_tensors, - return_token_type_ids=return_token_type_ids, - return_attention_mask=return_attention_mask, - return_overflowing_tokens=return_overflowing_tokens, - return_special_tokens_mask=return_special_tokens_mask, - return_offsets_mapping=return_offsets_mapping, - return_length=return_length, - verbose=verbose, - ) + all_kwargs = { + "add_special_tokens": add_special_tokens, + "padding": padding, + "truncation": truncation, + "max_length": max_length, + "stride": stride, + "is_split_into_words": is_split_into_words, + "pad_to_multiple_of": pad_to_multiple_of, + "return_tensors": return_tensors, + "return_token_type_ids": return_token_type_ids, + "return_attention_mask": return_attention_mask, + "return_overflowing_tokens": return_overflowing_tokens, + "return_special_tokens_mask": return_special_tokens_mask, + "return_offsets_mapping": return_offsets_mapping, + "return_length": return_length, + "verbose": verbose, + } all_kwargs.update(kwargs) if text is None and text_target is None: raise ValueError("You need to specify either `text` or `text_target`.") @@ -3010,7 +3010,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): batch_outputs = {} for i in range(batch_size): - inputs = dict((k, v[i]) for k, v in encoded_inputs.items()) + inputs = {k: v[i] for k, v in encoded_inputs.items()} outputs = self._pad( inputs, max_length=max_length, diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index bcdbd8325b..b484464f68 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -162,7 +162,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): """ base_vocab = self._tokenizer.get_vocab(with_added_tokens=False) full_vocab = self._tokenizer.get_vocab(with_added_tokens=True) - added_vocab = dict((tok, index) for tok, index in full_vocab.items() if tok not in base_vocab) + added_vocab = {tok: index for tok, index in full_vocab.items() if tok not in base_vocab} return added_vocab def __len__(self) -> int: diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index e4aa3f40a3..1f7df7e9f3 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -1081,7 +1081,7 @@ class Trainer: skipped = 0 for module in opt_model.modules(): if isinstance(module, nn.Embedding): - skipped += sum(dict((p.data_ptr(), p.numel()) for p in module.parameters()).values()) + skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values()) print(f"skipped {module}: {skipped/2**20}M params") manager.register_module_override(module, "weight", {"optim_bits": 32}) logger.debug(f"bitsandbytes: will optimize {module} in fp32") @@ -2564,12 +2564,12 @@ class Trainer: elif isinstance(data, (tuple, list)): return type(data)(self._prepare_input(v) for v in data) elif isinstance(data, torch.Tensor): - kwargs = dict(device=self.args.device) + kwargs = {"device": self.args.device} if self.deepspeed and data.dtype != torch.int64: # NLP models inputs are int64 and those get adjusted to the right dtype of the # embedding. Other models such as wav2vec2's inputs are already float and thus # may need special handling to match the dtypes of the model - kwargs.update(dict(dtype=self.args.hf_deepspeed_config.dtype())) + kwargs.update({"dtype": self.args.hf_deepspeed_config.dtype()}) return data.to(**kwargs) return data diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py index e6e5cca950..eefbb52683 100644 --- a/src/transformers/trainer_pt_utils.py +++ b/src/transformers/trainer_pt_utils.py @@ -534,7 +534,7 @@ def get_length_grouped_indices(lengths, batch_size, mega_batch_mult=None, genera indices = torch.randperm(len(lengths), generator=generator) megabatch_size = mega_batch_mult * batch_size megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)] - megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches] + megabatches = [sorted(megabatch, key=lambda i: lengths[i], reverse=True) for megabatch in megabatches] # The rest is to get the biggest batch first. # Since each megabatch is sorted by descending length, the longest element is the first diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index af63761d82..9f273ab1ed 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -505,21 +505,21 @@ class TrainerMemoryTracker: if self.torch is not None: self.gpu_mem_used_now = self.torch.cuda.memory_allocated() self.gpu_mem_used_peak = self.torch.cuda.max_memory_allocated() - self.gpu[self.cur_stage] = dict( - begin=self.gpu_mem_used_at_start, - end=self.gpu_mem_used_now, - alloc=(self.gpu_mem_used_now - self.gpu_mem_used_at_start), - peaked=max(0, self.gpu_mem_used_peak - self.gpu_mem_used_now), - ) + self.gpu[self.cur_stage] = { + "begin": self.gpu_mem_used_at_start, + "end": self.gpu_mem_used_now, + "alloc": (self.gpu_mem_used_now - self.gpu_mem_used_at_start), + "peaked": max(0, self.gpu_mem_used_peak - self.gpu_mem_used_now), + } # cpu self.cpu_mem_used_now = self.cpu_mem_used() - self.cpu[self.cur_stage] = dict( - begin=self.cpu_mem_used_at_start, - end=self.cpu_mem_used_now, - alloc=(self.cpu_mem_used_now - self.cpu_mem_used_at_start), - peaked=max(0, self.cpu_mem_used_peak - self.cpu_mem_used_now), - ) + self.cpu[self.cur_stage] = { + "begin": self.cpu_mem_used_at_start, + "end": self.cpu_mem_used_now, + "alloc": (self.cpu_mem_used_now - self.cpu_mem_used_at_start), + "peaked": max(0, self.cpu_mem_used_peak - self.cpu_mem_used_now), + } # reset - cycle finished self.cur_stage = None diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 28ba71f6af..dc3c0c4244 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -1874,7 +1874,7 @@ class TrainingArguments: the token values by removing their value. """ # filter out fields that are defined as field(init=False) - d = dict((field.name, getattr(self, field.name)) for field in fields(self) if field.init) + d = {field.name: getattr(self, field.name) for field in fields(self) if field.init} for k, v in d.items(): if isinstance(v, Enum): diff --git a/src/transformers/utils/doc.py b/src/transformers/utils/doc.py index 2e6264c508..f5eea7ae4e 100644 --- a/src/transformers/utils/doc.py +++ b/src/transformers/utils/doc.py @@ -1085,19 +1085,19 @@ def add_code_sample_docstrings( # putting all kwargs for docstrings in a dict to be used # with the `.format(**doc_kwargs)`. Note that string might # be formatted with non-existing keys, which is fine. - doc_kwargs = dict( - model_class=model_class, - processor_class=processor_class, - checkpoint=checkpoint, - mask=mask, - qa_target_start_index=qa_target_start_index, - qa_target_end_index=qa_target_end_index, - expected_output=expected_output, - expected_loss=expected_loss, - real_checkpoint=real_checkpoint, - fake_checkpoint=checkpoint, - true="{true}", # For syntax that conflicts with formatting. - ) + doc_kwargs = { + "model_class": model_class, + "processor_class": processor_class, + "checkpoint": checkpoint, + "mask": mask, + "qa_target_start_index": qa_target_start_index, + "qa_target_end_index": qa_target_end_index, + "expected_output": expected_output, + "expected_loss": expected_loss, + "real_checkpoint": real_checkpoint, + "fake_checkpoint": checkpoint, + "true": "{true}", # For syntax that conflicts with formatting. + } if ("SequenceClassification" in model_class or "AudioClassification" in model_class) and modality == "audio": code_sample = sample_docstrings["AudioClassification"] diff --git a/src/transformers/utils/hp_naming.py b/src/transformers/utils/hp_naming.py index bc806e8222..f7c5cb5259 100644 --- a/src/transformers/utils/hp_naming.py +++ b/src/transformers/utils/hp_naming.py @@ -96,12 +96,12 @@ class TrialShortNamer: if cls.NAMING_INFO is not None: return - info = dict( - short_word={}, - reverse_short_word={}, - short_param={}, - reverse_short_param={}, - ) + info = { + "short_word": {}, + "reverse_short_word": {}, + "short_param": {}, + "reverse_short_param": {}, + } field_keys = list(cls.DEFAULTS.keys()) diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index bb3575edf2..2bee24324c 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -902,7 +902,7 @@ def get_checkpoint_shard_files( with open(index_filename, "r") as f: index = json.loads(f.read()) - shard_filenames = sorted(list(set(index["weight_map"].values()))) + shard_filenames = sorted(set(index["weight_map"].values())) sharded_metadata = index["metadata"] sharded_metadata["all_checkpoint_keys"] = list(index["weight_map"].keys()) sharded_metadata["weight_map"] = index["weight_map"].copy() diff --git a/src/transformers/utils/model_parallel_utils.py b/src/transformers/utils/model_parallel_utils.py index b5d23417ce..7ec79a5e23 100644 --- a/src/transformers/utils/model_parallel_utils.py +++ b/src/transformers/utils/model_parallel_utils.py @@ -51,6 +51,6 @@ def get_device_map(n_layers, devices): """Returns a dictionary of layers distributed evenly across all devices.""" layers = list(range(n_layers)) n_blocks = int(ceil(n_layers / len(devices))) - layers_list = list(layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks)) + layers_list = [layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks)] return dict(zip(devices, layers_list)) diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py index 80dc017eea..60cec456c3 100644 --- a/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -157,9 +157,13 @@ class CoreIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon): super().setUp() master_port = get_master_port(real_launcher=False) - self.dist_env_1_gpu = dict( - MASTER_ADDR="localhost", MASTER_PORT=master_port, RANK="0", LOCAL_RANK="0", WORLD_SIZE="1" - ) + self.dist_env_1_gpu = { + "MASTER_ADDR": "localhost", + "MASTER_PORT": master_port, + "RANK": "0", + "LOCAL_RANK": "0", + "WORLD_SIZE": "1", + } def tearDown(self): super().tearDown() @@ -212,14 +216,18 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus): self.batch_size = args.train_batch_size master_port = get_master_port(real_launcher=False) - self.dist_env_1_gpu = dict( - MASTER_ADDR="localhost", MASTER_PORT=master_port, RANK="0", LOCAL_RANK="0", WORLD_SIZE="1" - ) + self.dist_env_1_gpu = { + "MASTER_ADDR": "localhost", + "MASTER_PORT": master_port, + "RANK": "0", + "LOCAL_RANK": "0", + "WORLD_SIZE": "1", + } - self.ds_config_file = dict( - zero2=f"{self.test_file_dir_str}/ds_config_zero2.json", - zero3=f"{self.test_file_dir_str}/ds_config_zero3.json", - ) + self.ds_config_file = { + "zero2": f"{self.test_file_dir_str}/ds_config_zero2.json", + "zero3": f"{self.test_file_dir_str}/ds_config_zero3.json", + } # use self.get_config_dict(stage) to use these to ensure the original is not modified with io.open(self.ds_config_file[ZERO2], "r", encoding="utf-8") as f: @@ -230,10 +238,10 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus): # It's in the file as a demo for users since we want everything to work out of the box even if slower. config_zero3["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = False - self.ds_config_dict = dict( - zero2=config_zero2, - zero3=config_zero3, - ) + self.ds_config_dict = { + "zero2": config_zero2, + "zero3": config_zero3, + } def tearDown(self): super().tearDown() @@ -370,7 +378,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T # this actually doesn't have to be on NVMe, any storage will do since this test only # runs a simple check that we can use some directory as if it were NVMe nvme_path = self.get_auto_remove_tmp_dir() - nvme_config = dict(device="nvme", nvme_path=nvme_path) + nvme_config = {"device": "nvme", "nvme_path": nvme_path} ds_config_zero3_dict = self.get_config_dict(ZERO3) ds_config_zero3_dict["zero_optimization"]["offload_optimizer"] = nvme_config ds_config_zero3_dict["zero_optimization"]["offload_param"] = nvme_config @@ -415,7 +423,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T # force cpu offload ds_config_dict["zero_optimization"]["offload_optimizer"]["device"] = "cpu" with mockenv_context(**self.dist_env_1_gpu): - kwargs = dict(local_rank=0, deepspeed=ds_config_dict) + kwargs = {"local_rank": 0, "deepspeed": ds_config_dict} kwargs[dtype] = True trainer = get_regression_trainer(**kwargs) with CaptureLogger(deepspeed_logger) as cl: @@ -431,7 +439,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T # it's run not as a first test as `sys.stdout` will no longer be the same. So we either have # to reset `deepspeed_logger.handlers[0].setStream(sys.stdout)` or directly capture from the deepspeed_logger. with mockenv_context(**self.dist_env_1_gpu): - kwargs = dict(local_rank=0, deepspeed=self.get_config_dict(stage)) + kwargs = {"local_rank": 0, "deepspeed": self.get_config_dict(stage)} kwargs[dtype] = True trainer = get_regression_trainer(**kwargs) @@ -449,15 +457,15 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T # `self.lr_scheduler.get_last_lr()` and originally it'd fail on the very first step. with mockenv_context(**self.dist_env_1_gpu): a = b = 0.0 - kwargs = dict( - a=a, - b=b, - local_rank=0, - train_len=8, - deepspeed=self.get_config_dict(stage), - per_device_train_batch_size=8, - logging_steps=1, - ) + kwargs = { + "a": a, + "b": b, + "local_rank": 0, + "train_len": 8, + "deepspeed": self.get_config_dict(stage), + "per_device_train_batch_size": 8, + "logging_steps": 1, + } kwargs[dtype] = True trainer = get_regression_trainer(**kwargs) @@ -494,13 +502,13 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T train_len = 64 a = b = 0.0 - kwargs = dict( - a=a, - b=b, - local_rank=0, - train_len=train_len, - deepspeed=self.get_config_dict(stage), - ) + kwargs = { + "a": a, + "b": b, + "local_rank": 0, + "train_len": train_len, + "deepspeed": self.get_config_dict(stage), + } kwargs[dtype] = True with mockenv_context(**self.dist_env_1_gpu): @@ -583,11 +591,11 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T # save checkpoints with mockenv_context(**self.dist_env_1_gpu): - kwargs = dict( - output_dir=output_dir, - save_steps=freq, - deepspeed=ds_config_dict, - ) + kwargs = { + "output_dir": output_dir, + "save_steps": freq, + "deepspeed": ds_config_dict, + } kwargs[dtype] = True trainer = get_regression_trainer(**kwargs) trainer.train() @@ -600,7 +608,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T with mockenv_context(**self.dist_env_1_gpu): ds_config_dict = self.get_config_dict(stage) output_dir = self.get_auto_remove_tmp_dir() - kwargs = dict(output_dir=output_dir, deepspeed=ds_config_dict) + kwargs = {"output_dir": output_dir, "deepspeed": ds_config_dict} kwargs[dtype] = True trainer = get_regression_trainer(**kwargs) @@ -632,7 +640,13 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T if stage == ZERO3: ds_config_dict["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = True - kwargs = dict(output_dir=output_dir, train_len=128, save_steps=5, learning_rate=0.1, deepspeed=ds_config_dict) + kwargs = { + "output_dir": output_dir, + "train_len": 128, + "save_steps": 5, + "learning_rate": 0.1, + "deepspeed": ds_config_dict, + } kwargs[dtype] = True with mockenv_context(**self.dist_env_1_gpu): @@ -679,16 +693,16 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T ds_config_dict = self.get_config_dict(stage) - kwargs = dict( - output_dir=output_dir, - train_len=4, - per_device_train_batch_size=4, - num_train_epochs=1, - save_strategy="steps", - save_steps=1, - learning_rate=0.1, - deepspeed=ds_config_dict, - ) + kwargs = { + "output_dir": output_dir, + "train_len": 4, + "per_device_train_batch_size": 4, + "num_train_epochs": 1, + "save_strategy": "steps", + "save_steps": 1, + "learning_rate": 0.1, + "deepspeed": ds_config_dict, + } kwargs[dtype] = True with mockenv_context(**self.dist_env_1_gpu): @@ -710,7 +724,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T # test that we can switch from zero2 to zero3 in the same process for example # test is_zero, etc. output_dir = self.get_auto_remove_tmp_dir() - kwargs = dict(output_dir=output_dir, train_len=8, fp16=True) + kwargs = {"output_dir": output_dir, "train_len": 8, "fp16": True} ds_config_zero3_dict = self.get_config_dict(ZERO3) ds_config_zero2_dict = self.get_config_dict(ZERO2) @@ -808,7 +822,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T def get_dataset(): data_file = str(self.tests_dir / "fixtures/tests_samples/SQUAD/sample.json") - data_files = dict(train=data_file, validation=data_file) + data_files = {"train": data_file, "validation": data_file} raw_datasets = datasets.load_dataset("json", data_files=data_files, field="data") train_dataset = raw_datasets["train"].map(_add_eos_to_examples).map(_convert_to_features, batched=True) valid_dataset = deepcopy(train_dataset) @@ -903,7 +917,14 @@ class TestDeepSpeedWithLauncher(TestCasePlus): do_train = True do_eval = False - kwargs = dict(stage=stage, dtype=dtype, eval_steps=1, distributed=True, do_train=do_train, do_eval=do_eval) + kwargs = { + "stage": stage, + "dtype": dtype, + "eval_steps": 1, + "distributed": True, + "do_train": do_train, + "do_eval": do_eval, + } # 1. normal training output_dir = self.run_and_check(**kwargs) diff --git a/tests/deepspeed/test_model_zoo.py b/tests/deepspeed/test_model_zoo.py index 984c7e7565..e51fe1e7cf 100644 --- a/tests/deepspeed/test_model_zoo.py +++ b/tests/deepspeed/test_model_zoo.py @@ -166,8 +166,8 @@ def make_task_cmds(): # but need a tiny model for each # # should have "{model_type.upper()}_TINY" corresponding vars defined, e.g., T5_TINY, etc. - tasks2models = dict( - trans=[ + tasks2models = { + "trans": [ "bart", "fsmt", "m2m_100", @@ -177,10 +177,10 @@ def make_task_cmds(): "t5_v1", # "mt5", missing model files ], - sum=[ + "sum": [ "pegasus", ], - clm=[ + "clm": [ "big_bird", "bigbird_pegasus", "blenderbot", @@ -192,7 +192,7 @@ def make_task_cmds(): "prophetnet", # "camembert", missing model files ], - mlm=[ + "mlm": [ "albert", "deberta", "deberta-v2", @@ -203,7 +203,7 @@ def make_task_cmds(): "layoutlm", # "reformer", # multiple issues with either mlm/qa/clas ], - qa=[ + "qa": [ "led", "longformer", "mobilebert", @@ -213,7 +213,7 @@ def make_task_cmds(): # "convbert", # missing tokenizer files # "layoutlmv2", missing model files ], - clas=[ + "clas": [ "bert", "xlnet", # "hubert", # missing tokenizer files @@ -223,54 +223,54 @@ def make_task_cmds(): # "openai-gpt", missing model files # "tapas", multiple issues ], - img_clas=[ + "img_clas": [ "vit", ], - ) + } scripts_dir = f"{ROOT_DIRECTORY}/examples/pytorch" - tasks = dict( - trans=f""" + tasks = { + "trans": f""" {scripts_dir}/translation/run_translation.py --train_file {data_dir_wmt}/train.json --source_lang en --target_lang ro """, - sum=f""" + "sum": f""" {scripts_dir}/summarization/run_summarization.py --train_file {data_dir_xsum}/sample.json --max_source_length 12 --max_target_length 12 --lang en """, - clm=f""" + "clm": f""" {scripts_dir}/language-modeling/run_clm.py --train_file {FIXTURE_DIRECTORY}/sample_text.txt --block_size 8 """, - mlm=f""" + "mlm": f""" {scripts_dir}/language-modeling/run_mlm.py --train_file {FIXTURE_DIRECTORY}/sample_text.txt """, - qa=f""" + "qa": f""" {scripts_dir}/question-answering/run_qa.py --train_file {data_dir_samples}/SQUAD/sample.json """, - clas=f""" + "clas": f""" {scripts_dir}/text-classification/run_glue.py --train_file {data_dir_samples}/MRPC/train.csv --max_seq_length 12 --task_name MRPC """, - img_clas=f""" + "img_clas": f""" {scripts_dir}/image-classification/run_image_classification.py --dataset_name hf-internal-testing/cats_vs_dogs_sample --remove_unused_columns False --max_steps 10 --image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json """, - ) + } launcher = get_launcher(distributed=True) diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index d86fb337af..8953adaa24 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -155,21 +155,21 @@ class TestTrainerExt(TestCasePlus): @require_torch_multi_gpu def test_trainer_log_level_replica(self, experiment_id): # as each sub-test is slow-ish split into multiple sub-tests to avoid CI timeout - experiments = dict( + experiments = { # test with the default log_level - should be info and thus log info once - base=dict(extra_args_str="", n_matches=1), + "base": {"extra_args_str": "", "n_matches": 1}, # test with low log_level and log_level_replica - should be noisy on all processes # now the info string should appear twice on 2 processes - low=dict(extra_args_str="--log_level debug --log_level_replica debug", n_matches=2), + "low": {"extra_args_str": "--log_level debug --log_level_replica debug", "n_matches": 2}, # test with high log_level and low log_level_replica # now the info string should appear once only on the replica - high=dict(extra_args_str="--log_level error --log_level_replica debug", n_matches=1), + "high": {"extra_args_str": "--log_level error --log_level_replica debug", "n_matches": 1}, # test with high log_level and log_level_replica - should be quiet on all processes - mixed=dict(extra_args_str="--log_level error --log_level_replica error", n_matches=0), - ) + "mixed": {"extra_args_str": "--log_level error --log_level_replica error", "n_matches": 0}, + } data = experiments[experiment_id] - kwargs = dict(distributed=True, predict_with_generate=False, do_eval=False, do_predict=False) + kwargs = {"distributed": True, "predict_with_generate": False, "do_eval": False, "do_predict": False} log_info_string = "Running training" with CaptureStderr() as cl: self.run_seq2seq_quick(**kwargs, extra_args_str=data["extra_args_str"]) diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 1287e4a876..b0d23b6fff 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -1480,7 +1480,7 @@ class GenerationTesterMixin: signature = inspect.signature(model.forward) # We want to test only models where encoder/decoder head masking is implemented - if not set(head_masking.keys()) < set([*signature.parameters.keys()]): + if not set(head_masking.keys()) < {*signature.parameters.keys()}: continue for attn_name, (name, mask) in zip(attention_names, head_masking.items()): diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py index b8f045442d..e1e525be3d 100644 --- a/tests/models/bart/test_modeling_bart.py +++ b/tests/models/bart/test_modeling_bart.py @@ -939,7 +939,7 @@ class BartModelIntegrationTests(unittest.TestCase): def test_xsum_config_generation_params(self): config = BartConfig.from_pretrained("facebook/bart-large-xsum") - expected_params = dict(num_beams=6, do_sample=False, early_stopping=True, length_penalty=1.0) + expected_params = {"num_beams": 6, "do_sample": False, "early_stopping": True, "length_penalty": 1.0} config_params = {k: getattr(config, k, "MISSING") for k, v in expected_params.items()} self.assertDictEqual(expected_params, config_params) diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py index 671541328d..1cc5377cf2 100644 --- a/tests/models/blenderbot/test_modeling_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_blenderbot.py @@ -299,8 +299,8 @@ class Blenderbot3BIntegrationTests(unittest.TestCase): @slow def test_generation_from_short_input_same_as_parlai_3B(self): - FASTER_GEN_KWARGS = dict(num_beams=1, early_stopping=True, min_length=15, max_length=25) - TOK_DECODE_KW = dict(skip_special_tokens=True, clean_up_tokenization_spaces=True) + FASTER_GEN_KWARGS = {"num_beams": 1, "early_stopping": True, "min_length": 15, "max_length": 25} + TOK_DECODE_KW = {"skip_special_tokens": True, "clean_up_tokenization_spaces": True} torch.cuda.empty_cache() model = BlenderbotForConditionalGeneration.from_pretrained(self.ckpt).half().to(torch_device) diff --git a/tests/models/blenderbot/test_modeling_flax_blenderbot.py b/tests/models/blenderbot/test_modeling_flax_blenderbot.py index 771a388d4a..ffcc9a7d04 100644 --- a/tests/models/blenderbot/test_modeling_flax_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_flax_blenderbot.py @@ -402,8 +402,8 @@ class FlaxBlenderbotModelTest(FlaxModelTesterMixin, unittest.TestCase, FlaxGener @unittest.skipUnless(jax_device != "cpu", "3B test too slow on CPU.") @slow def test_generation_from_short_input_same_as_parlai_3B(self): - FASTER_GEN_KWARGS = dict(num_beams=1, early_stopping=True, min_length=15, max_length=25) - TOK_DECODE_KW = dict(skip_special_tokens=True, clean_up_tokenization_spaces=True) + FASTER_GEN_KWARGS = {"num_beams": 1, "early_stopping": True, "min_length": 15, "max_length": 25} + TOK_DECODE_KW = {"skip_special_tokens": True, "clean_up_tokenization_spaces": True} model = FlaxBlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-3B", from_pt=True) tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-3B") diff --git a/tests/models/bloom/test_tokenization_bloom.py b/tests/models/bloom/test_tokenization_bloom.py index 88ead384e0..4857e2ab5f 100644 --- a/tests/models/bloom/test_tokenization_bloom.py +++ b/tests/models/bloom/test_tokenization_bloom.py @@ -124,7 +124,7 @@ class BloomTokenizationTest(TokenizerTesterMixin, unittest.TestCase): input_text = list(sample_data.values()) output_tokens = list(map(tokenizer.encode, input_text)) - predicted_text = list(map(lambda x: tokenizer.decode(x, clean_up_tokenization_spaces=False), output_tokens)) + predicted_text = [tokenizer.decode(x, clean_up_tokenization_spaces=False) for x in output_tokens] self.assertListEqual(predicted_text, input_text) def test_pretrained_model_lists(self): diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py index 88ad5be374..cee1205db9 100644 --- a/tests/models/clip/test_modeling_tf_clip.py +++ b/tests/models/clip/test_modeling_tf_clip.py @@ -551,7 +551,7 @@ class TFCLIPModelTest(TFModelTesterMixin, unittest.TestCase): if self.__class__.__name__ == "TFCLIPModelTest": inputs_dict.pop("return_loss", None) - tf_main_layer_classes = set( + tf_main_layer_classes = { module_member for model_class in self.all_model_classes for module in (import_module(model_class.__module__),) @@ -563,7 +563,7 @@ class TFCLIPModelTest(TFModelTesterMixin, unittest.TestCase): if isinstance(module_member, type) and tf.keras.layers.Layer in module_member.__bases__ and getattr(module_member, "_keras_serializable", False) - ) + } for main_layer_class in tf_main_layer_classes: # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter if "T5" in main_layer_class.__name__: diff --git a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py index eb085af0d8..0fa14e526a 100644 --- a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py +++ b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py @@ -398,7 +398,7 @@ class TFData2VecVisionModelTest(TFModelTesterMixin, unittest.TestCase): # The number of elements in the loss should be the same as the number of elements in the label _, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit() added_label = prepared_for_class[ - sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0] + sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0] ] loss_size = tf.size(added_label) diff --git a/tests/models/groupvit/test_modeling_tf_groupvit.py b/tests/models/groupvit/test_modeling_tf_groupvit.py index 6283ab8988..24a493445c 100644 --- a/tests/models/groupvit/test_modeling_tf_groupvit.py +++ b/tests/models/groupvit/test_modeling_tf_groupvit.py @@ -628,7 +628,7 @@ class TFGroupViTModelTest(TFModelTesterMixin, unittest.TestCase): if self.__class__.__name__ == "TFGroupViTModelTest": inputs_dict.pop("return_loss", None) - tf_main_layer_classes = set( + tf_main_layer_classes = { module_member for model_class in self.all_model_classes for module in (import_module(model_class.__module__),) @@ -640,7 +640,7 @@ class TFGroupViTModelTest(TFModelTesterMixin, unittest.TestCase): if isinstance(module_member, type) and tf.keras.layers.Layer in module_member.__bases__ and getattr(module_member, "_keras_serializable", False) - ) + } for main_layer_class in tf_main_layer_classes: # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter if "T5" in main_layer_class.__name__: diff --git a/tests/models/jukebox/test_modeling_jukebox.py b/tests/models/jukebox/test_modeling_jukebox.py index e77c8cb2eb..5f073bbd49 100644 --- a/tests/models/jukebox/test_modeling_jukebox.py +++ b/tests/models/jukebox/test_modeling_jukebox.py @@ -30,10 +30,10 @@ if is_torch_available(): class Jukebox1bModelTester(unittest.TestCase): all_model_classes = (JukeboxModel,) if is_torch_available() else () model_id = "openai/jukebox-1b-lyrics" - metas = dict( - artist="Zac Brown Band", - genres="Country", - lyrics="""I met a traveller from an antique land, + metas = { + "artist": "Zac Brown Band", + "genres": "Country", + "lyrics": """I met a traveller from an antique land, Who said "Two vast and trunkless legs of stone Stand in the desert. . . . Near them, on the sand, Half sunk a shattered visage lies, whose frown, @@ -48,7 +48,7 @@ class Jukebox1bModelTester(unittest.TestCase): Of that colossal Wreck, boundless and bare The lone and level sands stretch far away """, - ) + } # fmt: off EXPECTED_OUTPUT_2 = [ 1864, 1536, 1213, 1870, 1357, 1536, 519, 880, 1323, 789, 1082, 534, @@ -180,7 +180,7 @@ class Jukebox1bModelTester(unittest.TestCase): model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval() set_seed(0) waveform = torch.rand((1, 5120, 1)) - tokens = [i for i in self.prepare_inputs()] + tokens = list(self.prepare_inputs()) zs = [model.vqvae.encode(waveform, start_level=2, bs_chunks=waveform.shape[0])[0], None, None] zs = model._sample( @@ -220,10 +220,10 @@ class Jukebox1bModelTester(unittest.TestCase): class Jukebox5bModelTester(unittest.TestCase): all_model_classes = (JukeboxModel,) if is_torch_available() else () model_id = "openai/jukebox-5b-lyrics" - metas = dict( - artist="Zac Brown Band", - genres="Country", - lyrics="""I met a traveller from an antique land, + metas = { + "artist": "Zac Brown Band", + "genres": "Country", + "lyrics": """I met a traveller from an antique land, Who said "Two vast and trunkless legs of stone Stand in the desert. . . . Near them, on the sand, Half sunk a shattered visage lies, whose frown, @@ -238,7 +238,7 @@ class Jukebox5bModelTester(unittest.TestCase): Of that colossal Wreck, boundless and bare The lone and level sands stretch far away """, - ) + } # fmt: off EXPECTED_OUTPUT_2 = [ diff --git a/tests/models/jukebox/test_tokenization_jukebox.py b/tests/models/jukebox/test_tokenization_jukebox.py index 7ce2585bdd..c434cf6aa1 100644 --- a/tests/models/jukebox/test_tokenization_jukebox.py +++ b/tests/models/jukebox/test_tokenization_jukebox.py @@ -21,10 +21,10 @@ from transformers.testing_utils import require_torch class JukeboxTokenizationTest(unittest.TestCase): tokenizer_class = JukeboxTokenizer - metas = dict( - artist="Zac Brown Band", - genres="Country", - lyrics="""I met a traveller from an antique land, + metas = { + "artist": "Zac Brown Band", + "genres": "Country", + "lyrics": """I met a traveller from an antique land, Who said "Two vast and trunkless legs of stone Stand in the desert. . . . Near them, on the sand, Half sunk a shattered visage lies, whose frown, @@ -39,7 +39,7 @@ class JukeboxTokenizationTest(unittest.TestCase): Of that colossal Wreck, boundless and bare The lone and level sands stretch far away """, - ) + } @require_torch def test_1b_lyrics_tokenizer(self): diff --git a/tests/models/layoutlmv2/test_processor_layoutlmv2.py b/tests/models/layoutlmv2/test_processor_layoutlmv2.py index 18f4f8d5ac..91a8da9caf 100644 --- a/tests/models/layoutlmv2/test_processor_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_processor_layoutlmv2.py @@ -233,7 +233,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify image @@ -253,7 +253,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify images @@ -301,7 +301,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -340,7 +340,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "labels", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -362,7 +362,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "labels", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -403,7 +403,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -422,7 +422,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -456,7 +456,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -472,7 +472,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids diff --git a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py index 39de55efad..f6b51c6d71 100644 --- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py @@ -320,7 +320,7 @@ class TFLayoutLMv3ModelTest(TFModelTesterMixin, unittest.TestCase): # The number of elements in the loss should be the same as the number of elements in the label prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True) added_label = prepared_for_class[ - sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0] + sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0] ] expected_loss_size = added_label.shape.as_list()[:1] diff --git a/tests/models/layoutlmv3/test_processor_layoutlmv3.py b/tests/models/layoutlmv3/test_processor_layoutlmv3.py index 56f7925846..f649e0c275 100644 --- a/tests/models/layoutlmv3/test_processor_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_processor_layoutlmv3.py @@ -213,7 +213,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify image @@ -235,7 +235,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify images @@ -285,7 +285,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -324,7 +324,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "labels", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -346,7 +346,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "labels", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -387,7 +387,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -406,7 +406,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -440,7 +440,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -456,7 +456,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids diff --git a/tests/models/layoutxlm/test_processor_layoutxlm.py b/tests/models/layoutxlm/test_processor_layoutxlm.py index 2843528bae..5d74bacfa0 100644 --- a/tests/models/layoutxlm/test_processor_layoutxlm.py +++ b/tests/models/layoutxlm/test_processor_layoutxlm.py @@ -228,7 +228,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify image @@ -250,7 +250,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify images @@ -300,7 +300,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -339,7 +339,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "labels"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -361,7 +361,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids", "labels"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -402,7 +402,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -421,7 +421,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -455,7 +455,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -471,7 +471,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "bbox", "image", "input_ids"] - actual_keys = sorted(list(input_processor.keys())) + actual_keys = sorted(input_processor.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids diff --git a/tests/models/markuplm/test_processor_markuplm.py b/tests/models/markuplm/test_processor_markuplm.py index 141d7bae18..eb09701593 100644 --- a/tests/models/markuplm/test_processor_markuplm.py +++ b/tests/models/markuplm/test_processor_markuplm.py @@ -204,7 +204,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -216,7 +216,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -260,7 +260,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -294,7 +294,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): "xpath_subs_seq", "xpath_tags_seq", ] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -331,7 +331,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): "xpath_subs_seq", "xpath_tags_seq", ] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -367,7 +367,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -390,7 +390,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -425,7 +425,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids @@ -444,7 +444,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase): # verify keys expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"] - actual_keys = sorted(list(inputs.keys())) + actual_keys = sorted(inputs.keys()) self.assertListEqual(actual_keys, expected_keys) # verify input_ids diff --git a/tests/models/mobilevit/test_modeling_tf_mobilevit.py b/tests/models/mobilevit/test_modeling_tf_mobilevit.py index eea07f9413..9bb3872274 100644 --- a/tests/models/mobilevit/test_modeling_tf_mobilevit.py +++ b/tests/models/mobilevit/test_modeling_tf_mobilevit.py @@ -295,7 +295,7 @@ class MobileViTModelTest(TFModelTesterMixin, unittest.TestCase): # The number of elements in the loss should be the same as the number of elements in the label prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True) added_label = prepared_for_class[ - sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0] + sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0] ] expected_loss_size = added_label.shape.as_list()[:1] diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index f07b874676..872aed47e2 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -166,9 +166,11 @@ class PerceiverModelTester: audio = torch.randn( (self.batch_size, self.num_frames * self.audio_samples_per_frame, 1), device=torch_device ) - inputs = dict( - image=images, audio=audio, label=torch.zeros((self.batch_size, self.num_labels), device=torch_device) - ) + inputs = { + "image": images, + "audio": audio, + "label": torch.zeros((self.batch_size, self.num_labels), device=torch_device), + } else: raise ValueError(f"Model class {model_class} not supported") @@ -734,7 +736,7 @@ class PerceiverModelTest(ModelTesterMixin, unittest.TestCase): continue config, inputs, input_mask, _, _ = self.model_tester.prepare_config_and_inputs(model_class=model_class) - inputs_dict = dict(inputs=inputs, attention_mask=input_mask) + inputs_dict = {"inputs": inputs, "attention_mask": input_mask} for problem_type in problem_types: with self.subTest(msg=f"Testing {model_class} with {problem_type['title']}"): diff --git a/tests/models/roc_bert/test_tokenization_roc_bert.py b/tests/models/roc_bert/test_tokenization_roc_bert.py index 334a347a1e..0f8fe08efd 100644 --- a/tests/models/roc_bert/test_tokenization_roc_bert.py +++ b/tests/models/roc_bert/test_tokenization_roc_bert.py @@ -44,8 +44,8 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): super().setUp() vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]", "你", "好", "是", "谁", "a", "b", "c", "d"] - word_shape = dict() - word_pronunciation = dict() + word_shape = {} + word_pronunciation = {} for i, value in enumerate(vocab_tokens): word_shape[value] = i word_pronunciation[value] = i diff --git a/tests/models/segformer/test_modeling_tf_segformer.py b/tests/models/segformer/test_modeling_tf_segformer.py index bfcc580bb4..4bb423bfca 100644 --- a/tests/models/segformer/test_modeling_tf_segformer.py +++ b/tests/models/segformer/test_modeling_tf_segformer.py @@ -362,9 +362,7 @@ class TFSegformerModelTest(TFModelTesterMixin, unittest.TestCase): _, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit( for_segmentation=for_segmentation ) - added_label = prepared_for_class[ - sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0] - ] + added_label = prepared_for_class[sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]] loss_size = tf.size(added_label) # Test that model correctly compute the loss with kwargs diff --git a/tests/models/speecht5/test_feature_extraction_speecht5.py b/tests/models/speecht5/test_feature_extraction_speecht5.py index 34cf071bd1..390b769b8d 100644 --- a/tests/models/speecht5/test_feature_extraction_speecht5.py +++ b/tests/models/speecht5/test_feature_extraction_speecht5.py @@ -372,7 +372,7 @@ class SpeechT5FeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest ) self.assertIn("attention_mask", processed_pad) self.assertListEqual( - list(processed_pad.attention_mask.shape), list((processed_pad[input_name].shape[0], max_length)) + list(processed_pad.attention_mask.shape), [processed_pad[input_name].shape[0], max_length] ) self.assertListEqual( processed_pad.attention_mask[:, :max_length].sum(-1).tolist(), [max_length for x in speech_inputs] diff --git a/tests/models/t5/test_tokenization_t5.py b/tests/models/t5/test_tokenization_t5.py index 8dbef67297..16ff9f04de 100644 --- a/tests/models/t5/test_tokenization_t5.py +++ b/tests/models/t5/test_tokenization_t5.py @@ -387,7 +387,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_get_sentinel_token_ids(self): tokenizer = T5Tokenizer(SAMPLE_VOCAB, extra_ids=10) - self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted([i for i in range(1000, 1010)])) + self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010))) def test_get_sentinel_tokens_for_fasttokenizer(self): tokenizer = T5TokenizerFast(SAMPLE_VOCAB, extra_ids=10) @@ -398,4 +398,4 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_get_sentinel_token_ids_for_fasttokenizer(self): tokenizer = T5TokenizerFast(SAMPLE_VOCAB, extra_ids=10) - self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted([i for i in range(1000, 1010)])) + self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010))) diff --git a/tests/models/transfo_xl/test_modeling_transfo_xl.py b/tests/models/transfo_xl/test_modeling_transfo_xl.py index 7375475a95..89ac1d3b09 100644 --- a/tests/models/transfo_xl/test_modeling_transfo_xl.py +++ b/tests/models/transfo_xl/test_modeling_transfo_xl.py @@ -347,7 +347,7 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC # Retrieve the cutoffs and copy them copied_cutoffs = copy.copy(model_embed.cutoffs) - test_layers = [x for x in range(config.div_val)] + test_layers = list(range(config.div_val)) for layer in test_layers: # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size model_embed = model.resize_token_embeddings(model_vocab_size + 10, layer) diff --git a/tests/models/tvlt/test_modeling_tvlt.py b/tests/models/tvlt/test_modeling_tvlt.py index 0f3d5ab68a..bb6d2df0d9 100644 --- a/tests/models/tvlt/test_modeling_tvlt.py +++ b/tests/models/tvlt/test_modeling_tvlt.py @@ -581,7 +581,7 @@ class TvltModelIntegrationTest(unittest.TestCase): audio = prepare_audio() video_inputs = image_processor(video, return_tensors="pt").to(torch_device) audio_inputs = audio_feature_extractor(audio, return_tensors="pt").to(torch_device) - inputs = dict() + inputs = {} inputs.update(video_inputs) inputs.update(audio_inputs) @@ -606,7 +606,7 @@ class TvltModelIntegrationTest(unittest.TestCase): video_mixed_inputs = image_processor(video_mixed, is_mixed=True, return_tensors="pt").to(torch_device) audio_inputs = audio_feature_extractor(audio, return_tensors="pt", mask_audio=True).to(torch_device) labels = torch.tensor([[0.0]], device=torch_device) - inputs = dict() + inputs = {} inputs.update(video_inputs) inputs.update(video_mixed_inputs) inputs.update(audio_inputs) diff --git a/tests/models/vit_mae/test_modeling_tf_vit_mae.py b/tests/models/vit_mae/test_modeling_tf_vit_mae.py index 8c19c01491..48bda3aec7 100644 --- a/tests/models/vit_mae/test_modeling_tf_vit_mae.py +++ b/tests/models/vit_mae/test_modeling_tf_vit_mae.py @@ -333,7 +333,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - tf_main_layer_classes = set( + tf_main_layer_classes = { module_member for model_class in self.all_model_classes for module in (import_module(model_class.__module__),) @@ -345,7 +345,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase): if isinstance(module_member, type) and tf.keras.layers.Layer in module_member.__bases__ and getattr(module_member, "_keras_serializable", False) - ) + } num_patches = int((config.image_size // config.patch_size) ** 2) noise = np.random.uniform(size=(self.model_tester.batch_size, num_patches)) diff --git a/tests/models/wav2vec2/test_tokenization_wav2vec2.py b/tests/models/wav2vec2/test_tokenization_wav2vec2.py index 4027e0cefc..cf5dc100c2 100644 --- a/tests/models/wav2vec2/test_tokenization_wav2vec2.py +++ b/tests/models/wav2vec2/test_tokenization_wav2vec2.py @@ -231,7 +231,7 @@ class Wav2Vec2TokenizerTest(unittest.TestCase): tokenizer_files = tokenizer.save_pretrained(tmpdirname2) self.assertSequenceEqual( sorted(tuple(VOCAB_FILES_NAMES.values()) + ("special_tokens_map.json", "added_tokens.json")), - sorted(tuple(x.split(os.path.sep)[-1] for x in tokenizer_files)), + sorted(x.split(os.path.sep)[-1] for x in tokenizer_files), ) # Checks everything loads correctly in the same way @@ -456,7 +456,7 @@ class Wav2Vec2CTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase): def test_special_characters_in_vocab(self): sent = "ʈʰ æ æ̃ ˧ kʰ" - vocab_dict = {k: v for v, k in enumerate({phoneme for phoneme in sent.split()})} + vocab_dict = {k: v for v, k in enumerate(set(sent.split()))} vocab_file = os.path.join(self.tmpdirname, "vocab_special.json") with open(vocab_file, "w") as f: diff --git a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py index df5db0a3e2..a98ea55d0b 100644 --- a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py +++ b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py @@ -215,7 +215,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): with get_context(pool_context).Pool() as pool: decoded_processor = processor.batch_decode(logits, pool) - logits_list = [array for array in logits] + logits_list = list(logits) with get_context("fork").Pool() as p: decoded_beams = decoder.decode_beams_batch(p, logits_list) @@ -252,7 +252,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): ) decoded_processor = decoded_processor_out.text - logits_list = [array for array in logits] + logits_list = list(logits) with get_context("fork").Pool() as pool: decoded_decoder_out = decoder.decode_beams_batch( @@ -299,7 +299,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase): ) decoded_processor = decoded_processor_out.text - logits_list = [array for array in logits] + logits_list = list(logits) decoder.reset_params( alpha=alpha, beta=beta, diff --git a/tests/models/xlnet/test_modeling_tf_xlnet.py b/tests/models/xlnet/test_modeling_tf_xlnet.py index a8686d4a2b..230ef7a28e 100644 --- a/tests/models/xlnet/test_modeling_tf_xlnet.py +++ b/tests/models/xlnet/test_modeling_tf_xlnet.py @@ -400,7 +400,7 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase): # The number of elements in the loss should be the same as the number of elements in the label prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True) added_label = prepared_for_class[ - sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0] + sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0] ] expected_loss_size = added_label.shape.as_list()[:1] diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 6c61909527..4070966437 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -606,7 +606,7 @@ class PipelineUtilsTest(unittest.TestCase): dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}) self.assertEqual(len(dataset), 4) - outputs = [item for item in dataset] + outputs = list(dataset) self.assertEqual(outputs, [2, 3, 4, 5]) @require_torch @@ -624,7 +624,7 @@ class PipelineUtilsTest(unittest.TestCase): with self.assertRaises(TypeError): len(dataset) - outputs = [item for item in dataset] + outputs = list(dataset) self.assertEqual(outputs, [2, 3, 4, 5]) @require_torch @@ -638,7 +638,7 @@ class PipelineUtilsTest(unittest.TestCase): dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3) - outputs = [item for item in dataset] + outputs = list(dataset) self.assertEqual(outputs, [{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]) @require_torch @@ -654,7 +654,7 @@ class PipelineUtilsTest(unittest.TestCase): dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3) - outputs = [item for item in dataset] + outputs = list(dataset) self.assertEqual( nested_simplify(outputs), [{"id": [[12, 22]]}, {"id": [[2, 3]]}, {"id": [[2, 4]]}, {"id": [[5]]}] ) @@ -671,7 +671,7 @@ class PipelineUtilsTest(unittest.TestCase): dataset = PipelineChunkIterator(dataset, preprocess_chunk, {}, loader_batch_size=3) - outputs = [item for item in dataset] + outputs = list(dataset) self.assertEqual(outputs, [0, 1, 0, 1, 2]) @@ -692,7 +692,7 @@ class PipelineUtilsTest(unittest.TestCase): dataset = PipelinePackIterator(dataset, pack, {}) - outputs = [item for item in dataset] + outputs = list(dataset) self.assertEqual( outputs, [ @@ -719,7 +719,7 @@ class PipelineUtilsTest(unittest.TestCase): dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3) - outputs = [item for item in dataset] + outputs = list(dataset) self.assertEqual(outputs, [[{"id": 2}, {"id": 3}], [{"id": 4}, {"id": 5}]]) # is_false Across batch @@ -730,7 +730,7 @@ class PipelineUtilsTest(unittest.TestCase): dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3) - outputs = [item for item in dataset] + outputs = list(dataset) self.assertEqual(outputs, [[{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]]) @slow diff --git a/tests/pipelines/test_pipelines_fill_mask.py b/tests/pipelines/test_pipelines_fill_mask.py index 43825ae0f5..b5260488fb 100644 --- a/tests/pipelines/test_pipelines_fill_mask.py +++ b/tests/pipelines/test_pipelines_fill_mask.py @@ -281,7 +281,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): def run_test_targets(self, model, tokenizer): vocab = tokenizer.get_vocab() - targets = list(sorted(vocab.keys()))[:2] + targets = sorted(vocab.keys())[:2] # Pipeline argument fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, targets=targets) outputs = fill_masker(f"This is a {tokenizer.mask_token}") @@ -293,8 +293,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): ], ) target_ids = {vocab[el] for el in targets} - self.assertEqual(set(el["token"] for el in outputs), target_ids) - self.assertEqual(set(el["token_str"] for el in outputs), set(targets)) + self.assertEqual({el["token"] for el in outputs}, target_ids) + self.assertEqual({el["token_str"] for el in outputs}, set(targets)) # Call argument fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer) @@ -307,8 +307,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): ], ) target_ids = {vocab[el] for el in targets} - self.assertEqual(set(el["token"] for el in outputs), target_ids) - self.assertEqual(set(el["token_str"] for el in outputs), set(targets)) + self.assertEqual({el["token"] for el in outputs}, target_ids) + self.assertEqual({el["token_str"] for el in outputs}, set(targets)) # Score equivalence outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets) @@ -354,7 +354,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer) # top_k=2, ntargets=3 - targets = list(sorted(vocab.keys()))[:3] + targets = sorted(vocab.keys())[:3] outputs = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2, targets=targets) # If we use the most probably targets, and filter differently, we should still @@ -369,7 +369,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer) vocab = tokenizer.get_vocab() # String duplicates + id duplicates - targets = list(sorted(vocab.keys()))[:3] + targets = sorted(vocab.keys())[:3] targets = [targets[0], targets[1], targets[0], targets[2], targets[1]] outputs = fill_masker(f"My name is {tokenizer.mask_token}", targets=targets, top_k=10) diff --git a/tests/pipelines/test_pipelines_video_classification.py b/tests/pipelines/test_pipelines_video_classification.py index 9074196183..8390d21fc5 100644 --- a/tests/pipelines/test_pipelines_video_classification.py +++ b/tests/pipelines/test_pipelines_video_classification.py @@ -63,7 +63,7 @@ class VideoClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest def test_small_model_pt(self): small_model = "hf-internal-testing/tiny-random-VideoMAEForVideoClassification" small_feature_extractor = VideoMAEFeatureExtractor( - size=dict(shortest_edge=10), crop_size=dict(height=10, width=10) + size={"shortest_edge": 10}, crop_size={"height": 10, "width": 10} ) video_classifier = pipeline( "video-classification", model=small_model, feature_extractor=small_feature_extractor, frame_sampling_rate=4 diff --git a/tests/repo_utils/test_tests_fetcher.py b/tests/repo_utils/test_tests_fetcher.py index 0541b72d95..cd0109b535 100644 --- a/tests/repo_utils/test_tests_fetcher.py +++ b/tests/repo_utils/test_tests_fetcher.py @@ -56,9 +56,9 @@ class CheckDummiesTester(unittest.TestCase): "pytorch_utils.py", "models/bert/configuration_bert.py", ] - expected_deps = set(os.path.join(transformers_path, f) for f in expected_deps) + expected_deps = {os.path.join(transformers_path, f) for f in expected_deps} repo = Repo(git_repo_path) with checkout_commit(repo, GIT_TEST_SHA): deps = get_module_dependencies(bert_module) - deps = set(os.path.expanduser(f) for f in deps) + deps = {os.path.expanduser(f) for f in deps} self.assertEqual(deps, expected_deps) diff --git a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py index 01185fdaba..ecbe714a16 100644 --- a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py +++ b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py @@ -362,12 +362,12 @@ def main(): ): # Some have all caps in their config, some don't. label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): + if sorted(label_name_to_id.keys()) == sorted(label_list): label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} else: logger.warning( "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." + f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}." "\nIgnoring the model labels as a result.", ) elif data_args.task_name is None and not is_regression: diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 152ea7d6cd..eddf503334 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -1643,7 +1643,7 @@ class ModelTesterMixin: params = dict(model_reloaded.named_parameters()) params.update(dict(model_reloaded.named_buffers())) # param_names = set(k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys()) - param_names = set(k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys()) + param_names = {k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys()} missing_keys = set(infos["missing_keys"]) @@ -1770,8 +1770,8 @@ class ModelTesterMixin: def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class): """For temporarily ignoring some failed test cases (issues to be fixed)""" - tf_keys = set([k for k, v in tf_outputs.items() if v is not None]) - pt_keys = set([k for k, v in pt_outputs.items() if v is not None]) + tf_keys = {k for k, v in tf_outputs.items() if v is not None} + pt_keys = {k for k, v in pt_outputs.items() if v is not None} key_differences = tf_keys.symmetric_difference(pt_keys) @@ -2995,7 +2995,7 @@ class ModelUtilsTest(TestCasePlus): index = json.loads(f.read()) all_shards = set(index["weight_map"].values()) - shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".bin")) + shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".bin")} self.assertSetEqual(all_shards, shards_found) # Finally, check the model can be reloaded diff --git a/tests/test_modeling_flax_common.py b/tests/test_modeling_flax_common.py index f6737d8649..f93228e9b8 100644 --- a/tests/test_modeling_flax_common.py +++ b/tests/test_modeling_flax_common.py @@ -1099,7 +1099,7 @@ class FlaxModelTesterMixin: index = json.loads(f.read()) all_shards = set(index["weight_map"].values()) - shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".msgpack")) + shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".msgpack")} self.assertSetEqual(all_shards, shards_found) # Finally, check the model can be reloaded diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index ced3c0f86a..afd74411be 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -398,7 +398,7 @@ class TFModelTesterMixin: def test_keras_save_load(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - tf_main_layer_classes = set( + tf_main_layer_classes = { module_member for model_class in self.all_model_classes for module in (import_module(model_class.__module__),) @@ -410,7 +410,7 @@ class TFModelTesterMixin: if isinstance(module_member, type) and tf.keras.layers.Layer in module_member.__bases__ and getattr(module_member, "_keras_serializable", False) - ) + } for main_layer_class in tf_main_layer_classes: # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter if "T5" in main_layer_class.__name__: @@ -498,8 +498,8 @@ class TFModelTesterMixin: def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class): """For temporarily ignoring some failed test cases (issues to be fixed)""" - tf_keys = set([k for k, v in tf_outputs.items() if v is not None]) - pt_keys = set([k for k, v in pt_outputs.items() if v is not None]) + tf_keys = {k for k, v in tf_outputs.items() if v is not None} + pt_keys = {k for k, v in pt_outputs.items() if v is not None} key_differences = tf_keys.symmetric_difference(pt_keys) @@ -1455,7 +1455,7 @@ class TFModelTesterMixin: continue # The number of elements in the loss should be the same as the number of elements in the label prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True) - added_label_names = sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True) + added_label_names = sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True) if not added_label_names: continue # This test is only for models with easily-separable labels added_label = prepared_for_class[added_label_names[0]] @@ -1713,7 +1713,7 @@ class TFModelTesterMixin: } signature = inspect.signature(model.call) - if set(head_masking.keys()) < set([*signature.parameters.keys()]): + if set(head_masking.keys()) < {*signature.parameters.keys()}: continue for attn_name, (name, mask) in zip(attention_names, head_masking.items()): @@ -2274,7 +2274,7 @@ class UtilsFunctionsTest(unittest.TestCase): index = json.loads(f.read()) all_shards = set(index["weight_map"].values()) - shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".h5")) + shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".h5")} self.assertSetEqual(all_shards, shards_found) # Finally, check the model can be reloaded diff --git a/tests/test_sequence_feature_extraction_common.py b/tests/test_sequence_feature_extraction_common.py index 710ad01250..4c09c1c262 100644 --- a/tests/test_sequence_feature_extraction_common.py +++ b/tests/test_sequence_feature_extraction_common.py @@ -417,7 +417,7 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin): ) self.assertIn("attention_mask", processed_pad) self.assertListEqual( - list(processed_pad.attention_mask.shape), list((processed_pad[input_name].shape[0], max_length)) + list(processed_pad.attention_mask.shape), [processed_pad[input_name].shape[0], max_length] ) self.assertListEqual( processed_pad.attention_mask[:, :max_length].sum(-1).tolist(), [max_length for x in speech_inputs] diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 2c26deeffe..d167b646c0 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1148,7 +1148,13 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): # won't be the same since the training dataloader is shuffled). with tempfile.TemporaryDirectory() as tmpdir: - kwargs = dict(output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1, logging_steps=5) + kwargs = { + "output_dir": tmpdir, + "train_len": 128, + "save_steps": 5, + "learning_rate": 0.1, + "logging_steps": 5, + } trainer = get_regression_trainer(**kwargs) trainer.train() (a, b) = trainer.model.a.item(), trainer.model.b.item() @@ -1181,7 +1187,13 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): # With a regular model that is not a PreTrainedModel with tempfile.TemporaryDirectory() as tmpdir: - kwargs = dict(output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1, pretrained=False) + kwargs = { + "output_dir": tmpdir, + "train_len": 128, + "save_steps": 5, + "learning_rate": 0.1, + "pretrained": False, + } trainer = get_regression_trainer(**kwargs) trainer.train() diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py index a88ca1cb0d..8e851132c2 100644 --- a/tests/trainer/test_trainer_callback.py +++ b/tests/trainer/test_trainer_callback.py @@ -108,8 +108,8 @@ class TrainerCallbackTest(unittest.TestCase): self.assertEqual(len(cbs1), len(cbs2)) # Order doesn't matter - cbs1 = list(sorted(cbs1, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__)) - cbs2 = list(sorted(cbs2, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__)) + cbs1 = sorted(cbs1, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__) + cbs2 = sorted(cbs2, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__) for cb1, cb2 in zip(cbs1, cbs2): if isinstance(cb1, type) and isinstance(cb2, type): diff --git a/tests/trainer/test_trainer_utils.py b/tests/trainer/test_trainer_utils.py index 869d19b0a1..ccf162677e 100644 --- a/tests/trainer/test_trainer_utils.py +++ b/tests/trainer/test_trainer_utils.py @@ -189,7 +189,7 @@ class TrainerUtilsTest(unittest.TestCase): # The biggest element should be first self.assertEqual(lengths[indices[0]], 50) # The indices should be a permutation of range(100) - self.assertEqual(list(sorted(indices)), list(range(100))) + self.assertEqual(sorted(indices), list(range(100))) def test_group_by_length_with_dict(self): # Get some inputs of random lengths @@ -204,7 +204,7 @@ class TrainerUtilsTest(unittest.TestCase): # The biggest element should be first self.assertEqual(len(data[indices[0]]["input_ids"]), 105) # The indices should be a permutation of range(6) - self.assertEqual(list(sorted(indices)), list(range(6))) + self.assertEqual(sorted(indices), list(range(6))) def test_group_by_length_with_batch_encoding(self): # Get some inputs of random lengths @@ -219,7 +219,7 @@ class TrainerUtilsTest(unittest.TestCase): # The biggest element should be first self.assertEqual(len(data[indices[0]]["input_ids"]), 105) # The indices should be a permutation of range(6) - self.assertEqual(list(sorted(indices)), list(range(6))) + self.assertEqual(sorted(indices), list(range(6))) def test_distributed_length_grouped(self): # Get some inputs of random lengths @@ -232,7 +232,7 @@ class TrainerUtilsTest(unittest.TestCase): # The biggest element should be first self.assertEqual(lengths[indices_process_0[0]], 50) # The indices should be a permutation of range(100) - self.assertEqual(list(sorted(indices_process_0 + indices_process_1)), list(range(100))) + self.assertEqual(sorted(indices_process_0 + indices_process_1), list(range(100))) def test_get_parameter_names(self): model = nn.Sequential(TstLayer(128), nn.ModuleList([TstLayer(128), TstLayer(128)])) diff --git a/tests/utils/test_modeling_tf_core.py b/tests/utils/test_modeling_tf_core.py index 7795833507..f144a7b8d9 100644 --- a/tests/utils/test_modeling_tf_core.py +++ b/tests/utils/test_modeling_tf_core.py @@ -285,7 +285,7 @@ class TFCoreModelTesterMixin: del inputs_dict["decoder_head_mask"] if "cross_attn_head_mask" in inputs_dict: del inputs_dict["cross_attn_head_mask"] - tf_main_layer_classes = set( + tf_main_layer_classes = { module_member for model_class in self.all_model_classes for module in (import_module(model_class.__module__),) @@ -295,7 +295,7 @@ class TFCoreModelTesterMixin: if isinstance(module_member, type) and tf.keras.layers.Layer in module_member.__bases__ and getattr(module_member, "_keras_serializable", False) - ) + } for main_layer_class in tf_main_layer_classes: # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter diff --git a/utils/check_copies.py b/utils/check_copies.py index d32df3b870..4fd2017e60 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -385,7 +385,7 @@ def convert_to_localized_md(model_list, localized_model_list, format_str): sorted_index = sorted(localized_model_index.items(), key=lambda x: x[0].lower()) - return readmes_match, "\n".join(map(lambda x: x[1], sorted_index)) + "\n" + return readmes_match, "\n".join((x[1] for x in sorted_index)) + "\n" def convert_readme_to_index(model_list): diff --git a/utils/check_doc_toc.py b/utils/check_doc_toc.py index 67ec2f9466..a01804284c 100644 --- a/utils/check_doc_toc.py +++ b/utils/check_doc_toc.py @@ -33,7 +33,7 @@ def clean_model_doc_toc(model_doc): new_doc = [] for duplicate_key in duplicates: - titles = list(set(doc["title"] for doc in model_doc if doc["local"] == duplicate_key)) + titles = list({doc["title"] for doc in model_doc if doc["local"] == duplicate_key}) if len(titles) > 1: raise ValueError( f"{duplicate_key} is present several times in the documentation table of content at " diff --git a/utils/check_repo.py b/utils/check_repo.py index 53717645cf..f7582f35ca 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -335,7 +335,7 @@ def check_model_list(): # Get the models from the directory structure of `src/transformers/models/` models = [model for model in dir(transformers.models) if not model.startswith("__")] - missing_models = sorted(list(set(_models).difference(models))) + missing_models = sorted(set(_models).difference(models)) if missing_models: raise Exception( f"The following models should be included in {models_dir}/__init__.py: {','.join(missing_models)}." @@ -547,7 +547,7 @@ def get_all_auto_configured_models(): for attr_name in dir(transformers.models.auto.modeling_flax_auto): if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING_NAMES"): result = result | set(get_values(getattr(transformers.models.auto.modeling_flax_auto, attr_name))) - return [cls for cls in result] + return list(result) def ignore_unautoclassed(model_name): diff --git a/utils/create_dummy_models.py b/utils/create_dummy_models.py index 47c150d6e8..162a310c65 100644 --- a/utils/create_dummy_models.py +++ b/utils/create_dummy_models.py @@ -413,10 +413,10 @@ def convert_processors(processors, tiny_config, output_folder, result): feature_extractors.append(processor.feature_extractor) # check the built processors have the unique type - num_types = len(set([x.__class__.__name__ for x in feature_extractors])) + num_types = len({x.__class__.__name__ for x in feature_extractors}) if num_types >= 2: raise ValueError(f"`feature_extractors` should contain at most 1 type, but it contains {num_types} types!") - num_types = len(set([x.__class__.__name__.replace("Fast", "") for x in tokenizers])) + num_types = len({x.__class__.__name__.replace("Fast", "") for x in tokenizers}) if num_types >= 2: raise ValueError(f"`tokenizers` should contain at most 1 tokenizer type, but it contains {num_types} types!") @@ -712,7 +712,7 @@ def build_composite_models(config_class, output_dir): shutil.copytree(decoder_processor_path, model_path, dirs_exist_ok=True) # fill `result` - result["processor"] = tuple(set([x.__name__ for x in encoder_processor + decoder_processor])) + result["processor"] = tuple({x.__name__ for x in encoder_processor + decoder_processor}) result["pytorch"] = {model_class.__name__: {"model": model_class.__name__, "checkpoint": model_path}} diff --git a/utils/extract_warnings.py b/utils/extract_warnings.py index cb609e8615..bc26e79366 100644 --- a/utils/extract_warnings.py +++ b/utils/extract_warnings.py @@ -134,6 +134,6 @@ if __name__ == "__main__": # extract warnings from artifacts selected_warnings = extract_warnings(args.output_dir, args.targets) - selected_warnings = sorted(list(selected_warnings)) + selected_warnings = sorted(selected_warnings) with open(os.path.join(args.output_dir, "selected_warnings.json"), "w", encoding="UTF-8") as fp: json.dump(selected_warnings, fp, ensure_ascii=False, indent=4) diff --git a/utils/get_ci_error_statistics.py b/utils/get_ci_error_statistics.py index b6642dce9c..5e2846ee39 100644 --- a/utils/get_ci_error_statistics.py +++ b/utils/get_ci_error_statistics.py @@ -166,7 +166,7 @@ def reduce_by_model(logs, error_filter=None): logs = [(x[0], x[1], get_model(x[2])) for x in logs] logs = [x for x in logs if x[2] is not None] - tests = set([x[2] for x in logs]) + tests = {x[2] for x in logs} r = {} for test in tests: diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index 84dd062a19..1d1df9e817 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -78,13 +78,11 @@ def get_all_tests(): # test folders/files directly under `tests` folder tests = os.listdir(test_root_dir) - tests = sorted( - list(filter(lambda x: os.path.isdir(x) or x.startswith("tests/test_"), [f"tests/{x}" for x in tests])) - ) + tests = sorted(filter(lambda x: os.path.isdir(x) or x.startswith("tests/test_"), [f"tests/{x}" for x in tests])) # model specific test folders model_tests_folders = os.listdir(os.path.join(test_root_dir, "models")) - model_test_folders = sorted(list(filter(os.path.isdir, [f"tests/models/{x}" for x in model_tests_folders]))) + model_test_folders = sorted(filter(os.path.isdir, [f"tests/models/{x}" for x in model_tests_folders])) tests.remove("tests/models") tests = model_test_folders + tests @@ -265,7 +263,7 @@ def get_tree_starting_at(module, edges): tree = [module] while len(new_edges) > 0: tree.append(new_edges) - final_vertices = list(set(edge[1] for edge in new_edges)) + final_vertices = list({edge[1] for edge in new_edges}) vertices_seen.extend(final_vertices) new_edges = [edge for edge in edges if edge[0] in final_vertices and edge[1] not in vertices_seen] @@ -285,10 +283,10 @@ def print_tree_deps_of(module, all_edges=None): lines = [(tree[0], tree[0])] for index in range(1, len(tree)): edges = tree[index] - start_edges = set([edge[0] for edge in edges]) + start_edges = {edge[0] for edge in edges} for start in start_edges: - end_edges = set([edge[1] for edge in edges if edge[0] == start]) + end_edges = {edge[1] for edge in edges if edge[0] == start} # We will insert all those edges just after the line showing start. pos = 0 while lines[pos][1] != start: @@ -547,7 +545,7 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j impacted_files.extend(impacted_modules_map[f]) # Remove duplicates - impacted_files = sorted(list(set(impacted_files))) + impacted_files = sorted(set(impacted_files)) print(f"\n### IMPACTED FILES ###\n{_print_list(impacted_files)}") # Grab the corresponding test files: @@ -578,7 +576,7 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j test_files_to_run.extend(new_tests) # Remove duplicates - test_files_to_run = sorted(list(set(test_files_to_run))) + test_files_to_run = sorted(set(test_files_to_run)) # Make sure we did not end up with a test file that was removed test_files_to_run = [f for f in test_files_to_run if os.path.isfile(f) or os.path.isdir(f)] if filters is not None: diff --git a/utils/update_metadata.py b/utils/update_metadata.py index 6aeb767375..f95a4575d1 100644 --- a/utils/update_metadata.py +++ b/utils/update_metadata.py @@ -223,7 +223,7 @@ def update_metadata(token, commit_sha): table = update_pipeline_and_auto_class_table(table) # Sort the model classes to avoid some nondeterministic updates to create false update commits. - model_classes = sorted(list(table.keys())) + model_classes = sorted(table.keys()) tags_table = pd.DataFrame( { "model_class": model_classes,