diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py
index 66bd729075..ef9c515da4 100644
--- a/examples/flax/image-captioning/run_image_captioning_flax.py
+++ b/examples/flax/image-captioning/run_image_captioning_flax.py
@@ -892,14 +892,12 @@ def main():
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
diff --git a/examples/flax/language-modeling/run_bart_dlm_flax.py b/examples/flax/language-modeling/run_bart_dlm_flax.py
index 0a97bffd93..62e4e8a839 100644
--- a/examples/flax/language-modeling/run_bart_dlm_flax.py
+++ b/examples/flax/language-modeling/run_bart_dlm_flax.py
@@ -756,14 +756,12 @@ def main():
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py
index 607c9bb1ee..952419dc96 100755
--- a/examples/flax/language-modeling/run_clm_flax.py
+++ b/examples/flax/language-modeling/run_clm_flax.py
@@ -648,14 +648,12 @@ def main():
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py
index 6a06533b14..ae289b8470 100755
--- a/examples/flax/language-modeling/run_mlm_flax.py
+++ b/examples/flax/language-modeling/run_mlm_flax.py
@@ -679,14 +679,12 @@ def main():
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py
index 814d68a88e..152760f4bf 100755
--- a/examples/flax/language-modeling/run_t5_mlm_flax.py
+++ b/examples/flax/language-modeling/run_t5_mlm_flax.py
@@ -791,14 +791,12 @@ def main():
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py
index 628b9b81b2..7933c3bd3e 100644
--- a/examples/flax/question-answering/run_qa.py
+++ b/examples/flax/question-answering/run_qa.py
@@ -333,14 +333,12 @@ def create_train_state(
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
@@ -642,7 +640,7 @@ def main():
return tokenized_examples
- processed_raw_datasets = dict()
+ processed_raw_datasets = {}
if training_args.do_train:
if "train" not in raw_datasets:
raise ValueError("--do_train requires a train dataset")
diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py
index feda695920..67f164bc0b 100644
--- a/examples/flax/summarization/run_summarization_flax.py
+++ b/examples/flax/summarization/run_summarization_flax.py
@@ -742,14 +742,12 @@ def main():
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py
index c47ea90d39..4fd12404d4 100755
--- a/examples/flax/text-classification/run_flax_glue.py
+++ b/examples/flax/text-classification/run_flax_glue.py
@@ -229,14 +229,12 @@ def create_train_state(
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
@@ -449,7 +447,7 @@ def main():
):
# Some have all caps in their config, some don't.
label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
- if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
+ if sorted(label_name_to_id.keys()) == sorted(label_list):
logger.info(
f"The configuration of the model provided the following label correspondence: {label_name_to_id}. "
"Using it!"
@@ -458,7 +456,7 @@ def main():
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
+ f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
"\nIgnoring the model labels as a result.",
)
elif data_args.task_name is None:
diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py
index c7509433d9..d176765289 100644
--- a/examples/flax/token-classification/run_flax_ner.py
+++ b/examples/flax/token-classification/run_flax_ner.py
@@ -290,14 +290,12 @@ def create_train_state(
flat_params = traverse_util.flatten_dict(params)
# find out all LayerNorm parameters
layer_norm_candidates = ["layernorm", "layer_norm", "ln"]
- layer_norm_named_params = set(
- [
- layer[-2:]
- for layer_norm_name in layer_norm_candidates
- for layer in flat_params.keys()
- if layer_norm_name in "".join(layer).lower()
- ]
- )
+ layer_norm_named_params = {
+ layer[-2:]
+ for layer_norm_name in layer_norm_candidates
+ for layer in flat_params.keys()
+ if layer_norm_name in "".join(layer).lower()
+ }
flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_named_params) for path in flat_params}
return traverse_util.unflatten_dict(flat_mask)
diff --git a/examples/legacy/pytorch-lightning/run_glue.py b/examples/legacy/pytorch-lightning/run_glue.py
index aa2349f280..f96c5bafcd 100644
--- a/examples/legacy/pytorch-lightning/run_glue.py
+++ b/examples/legacy/pytorch-lightning/run_glue.py
@@ -192,7 +192,7 @@ def main():
# Optionally, predict on dev set and write to output_dir
if args.do_predict:
- checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpoint-epoch=*.ckpt"), recursive=True)))
+ checkpoints = sorted(glob.glob(os.path.join(args.output_dir, "checkpoint-epoch=*.ckpt"), recursive=True))
model = model.load_from_checkpoint(checkpoints[-1])
return trainer.test(model)
diff --git a/examples/legacy/pytorch-lightning/run_ner.py b/examples/legacy/pytorch-lightning/run_ner.py
index 3bcbdfee03..473851edef 100644
--- a/examples/legacy/pytorch-lightning/run_ner.py
+++ b/examples/legacy/pytorch-lightning/run_ner.py
@@ -211,6 +211,6 @@ if __name__ == "__main__":
# pl use this default format to create a checkpoint:
# https://github.com/PyTorchLightning/pytorch-lightning/blob/master\
# /pytorch_lightning/callbacks/model_checkpoint.py#L322
- checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpoint-epoch=*.ckpt"), recursive=True)))
+ checkpoints = sorted(glob.glob(os.path.join(args.output_dir, "checkpoint-epoch=*.ckpt"), recursive=True))
model = model.load_from_checkpoint(checkpoints[-1])
trainer.test(model)
diff --git a/examples/legacy/question-answering/run_squad.py b/examples/legacy/question-answering/run_squad.py
index d966b3f02f..fc9411e95d 100644
--- a/examples/legacy/question-answering/run_squad.py
+++ b/examples/legacy/question-answering/run_squad.py
@@ -810,10 +810,10 @@ def main():
logger.info("Loading checkpoints saved during training for evaluation")
checkpoints = [args.output_dir]
if args.eval_all_checkpoints:
- checkpoints = list(
+ checkpoints = [
os.path.dirname(c)
for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
- )
+ ]
else:
logger.info("Loading checkpoint %s for evaluation", args.model_name_or_path)
@@ -830,7 +830,7 @@ def main():
# Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step)
- result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
+ result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()}
results.update(result)
logger.info("Results: {}".format(results))
diff --git a/examples/legacy/run_openai_gpt.py b/examples/legacy/run_openai_gpt.py
index 1f02570f8f..03031f2057 100755
--- a/examples/legacy/run_openai_gpt.py
+++ b/examples/legacy/run_openai_gpt.py
@@ -189,7 +189,7 @@ def main():
return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(obj))
elif isinstance(obj, int):
return obj
- return list(tokenize_and_encode(o) for o in obj)
+ return [tokenize_and_encode(o) for o in obj]
logger.info("Encoding dataset...")
train_dataset = load_rocstories_dataset(args.train_dataset)
diff --git a/examples/legacy/run_swag.py b/examples/legacy/run_swag.py
index 5cac156724..bde0501687 100755
--- a/examples/legacy/run_swag.py
+++ b/examples/legacy/run_swag.py
@@ -696,9 +696,9 @@ def main():
checkpoints = [args.model_name_or_path]
if args.eval_all_checkpoints:
- checkpoints = list(
+ checkpoints = [
os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
- )
+ ]
logger.info("Evaluate the following checkpoints: %s", checkpoints)
@@ -712,7 +712,7 @@ def main():
# Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step)
- result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
+ result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()}
results.update(result)
logger.info("Results: {}".format(results))
diff --git a/examples/legacy/seq2seq/run_distributed_eval.py b/examples/legacy/seq2seq/run_distributed_eval.py
index 655807ba17..55f3839d73 100755
--- a/examples/legacy/seq2seq/run_distributed_eval.py
+++ b/examples/legacy/seq2seq/run_distributed_eval.py
@@ -111,7 +111,7 @@ def eval_data_dir(
if num_return_sequences > 1:
preds = chunks(preds, num_return_sequences) # batch size chunks, each of size num_return_seq
for i, pred in enumerate(preds):
- results.append(dict(pred=pred, id=ids[i].item()))
+ results.append({"pred": pred, "id": ids[i].item()})
save_json(results, save_path)
return results, sampler.num_replicas
@@ -232,7 +232,7 @@ def combine_partial_results(partial_results) -> List:
records = []
for partial_result in partial_results:
records.extend(partial_result)
- records = list(sorted(records, key=lambda x: x["id"]))
+ records = sorted(records, key=lambda x: x["id"])
preds = [x["pred"] for x in records]
return preds
diff --git a/examples/legacy/seq2seq/run_eval.py b/examples/legacy/seq2seq/run_eval.py
index a8aa8e7ef9..35e11c86a1 100755
--- a/examples/legacy/seq2seq/run_eval.py
+++ b/examples/legacy/seq2seq/run_eval.py
@@ -76,7 +76,7 @@ def generate_summaries_or_translations(
fout.close()
runtime = int(time.time() - start_time) # seconds
n_obs = len(examples)
- return dict(n_obs=n_obs, runtime=runtime, seconds_per_sample=round(runtime / n_obs, 4))
+ return {"n_obs": n_obs, "runtime": runtime, "seconds_per_sample": round(runtime / n_obs, 4)}
def datetime_now():
diff --git a/examples/legacy/seq2seq/run_eval_search.py b/examples/legacy/seq2seq/run_eval_search.py
index c72f038fc5..1ed08c2274 100755
--- a/examples/legacy/seq2seq/run_eval_search.py
+++ b/examples/legacy/seq2seq/run_eval_search.py
@@ -36,7 +36,7 @@ def parse_search_arg(search):
groups = search.split()
entries = {k: vs for k, vs in (g.split("=") for g in groups)}
entry_names = list(entries.keys())
- sets = [list(f"--{k} {v}" for v in vs.split(":")) for k, vs in entries.items()]
+ sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()]
matrix = [list(x) for x in itertools.product(*sets)]
return matrix, entry_names
diff --git a/examples/legacy/seq2seq/utils.py b/examples/legacy/seq2seq/utils.py
index 2655165cf1..d7cd84dedb 100644
--- a/examples/legacy/seq2seq/utils.py
+++ b/examples/legacy/seq2seq/utils.py
@@ -456,7 +456,7 @@ def pickle_save(obj, path):
def flatten_list(summary_ids: List[List]):
- return [x for x in itertools.chain.from_iterable(summary_ids)]
+ return list(itertools.chain.from_iterable(summary_ids))
def save_git_info(folder_path: str) -> None:
diff --git a/examples/pytorch/audio-classification/run_audio_classification.py b/examples/pytorch/audio-classification/run_audio_classification.py
index 20ddec4acb..054a0fd00e 100644
--- a/examples/pytorch/audio-classification/run_audio_classification.py
+++ b/examples/pytorch/audio-classification/run_audio_classification.py
@@ -293,7 +293,7 @@ def main():
audio["array"], max_length=data_args.max_length_seconds, sample_rate=feature_extractor.sampling_rate
)
output_batch["input_values"].append(wav)
- output_batch["labels"] = [label for label in batch[data_args.label_column_name]]
+ output_batch["labels"] = list(batch[data_args.label_column_name])
return output_batch
@@ -303,14 +303,14 @@ def main():
for audio in batch[data_args.audio_column_name]:
wav = audio["array"]
output_batch["input_values"].append(wav)
- output_batch["labels"] = [label for label in batch[data_args.label_column_name]]
+ output_batch["labels"] = list(batch[data_args.label_column_name])
return output_batch
# Prepare label mappings.
# We'll include these in the model's config to get human readable labels in the Inference API.
labels = raw_datasets["train"].features[data_args.label_column_name].names
- label2id, id2label = dict(), dict()
+ label2id, id2label = {}, {}
for i, label in enumerate(labels):
label2id[label] = str(i)
id2label[str(i)] = label
diff --git a/examples/pytorch/benchmarking/plot_csv_file.py b/examples/pytorch/benchmarking/plot_csv_file.py
index 1a0ae735d8..9a9ad9c670 100644
--- a/examples/pytorch/benchmarking/plot_csv_file.py
+++ b/examples/pytorch/benchmarking/plot_csv_file.py
@@ -83,7 +83,7 @@ def can_convert_to_float(string):
class Plot:
def __init__(self, args):
self.args = args
- self.result_dict = defaultdict(lambda: dict(bsz=[], seq_len=[], result={}))
+ self.result_dict = defaultdict(lambda: {"bsz": [], "seq_len": [], "result": {}})
with open(self.args.csv_file, newline="") as csv_file:
reader = csv.DictReader(csv_file)
@@ -116,8 +116,8 @@ class Plot:
axis.set_major_formatter(ScalarFormatter())
for model_name_idx, model_name in enumerate(self.result_dict.keys()):
- batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"])))
- sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"])))
+ batch_sizes = sorted(set(self.result_dict[model_name]["bsz"]))
+ sequence_lengths = sorted(set(self.result_dict[model_name]["seq_len"]))
results = self.result_dict[model_name]["result"]
(x_axis_array, inner_loop_array) = (
diff --git a/examples/pytorch/contrastive-image-text/run_clip.py b/examples/pytorch/contrastive-image-text/run_clip.py
index 4669a9b93d..2a6b1dab77 100644
--- a/examples/pytorch/contrastive-image-text/run_clip.py
+++ b/examples/pytorch/contrastive-image-text/run_clip.py
@@ -397,7 +397,7 @@ def main():
# Preprocessing the datasets.
# We need to tokenize input captions and transform the images.
def tokenize_captions(examples):
- captions = [caption for caption in examples[caption_column]]
+ captions = list(examples[caption_column])
text_inputs = tokenizer(captions, max_length=data_args.max_seq_length, padding="max_length", truncation=True)
examples["input_ids"] = text_inputs.input_ids
examples["attention_mask"] = text_inputs.attention_mask
diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py
index 78979e4155..114cf4dd0f 100644
--- a/examples/pytorch/image-classification/run_image_classification.py
+++ b/examples/pytorch/image-classification/run_image_classification.py
@@ -250,7 +250,7 @@ def main():
# Prepare label mappings.
# We'll include these in the model's config to get human readable labels in the Inference API.
labels = dataset["train"].features["labels"].names
- label2id, id2label = dict(), dict()
+ label2id, id2label = {}, {}
for i, label in enumerate(labels):
label2id[label] = str(i)
id2label[str(i)] = label
diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py
index f3448a7753..55cde66048 100644
--- a/examples/pytorch/image-pretraining/run_mae.py
+++ b/examples/pytorch/image-pretraining/run_mae.py
@@ -91,7 +91,7 @@ class DataTrainingArguments:
)
def __post_init__(self):
- data_files = dict()
+ data_files = {}
if self.train_dir is not None:
data_files["train"] = self.train_dir
if self.validation_dir is not None:
diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py
index a906088ed5..d57f201f09 100644
--- a/examples/pytorch/image-pretraining/run_mim.py
+++ b/examples/pytorch/image-pretraining/run_mim.py
@@ -104,7 +104,7 @@ class DataTrainingArguments:
)
def __post_init__(self):
- data_files = dict()
+ data_files = {}
if self.train_dir is not None:
data_files["train"] = self.train_dir
if self.validation_dir is not None:
diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py
index ae01b7614e..23c4abb54b 100755
--- a/examples/pytorch/language-modeling/run_clm.py
+++ b/examples/pytorch/language-modeling/run_clm.py
@@ -407,7 +407,7 @@ def main():
)
else:
model = AutoModelForCausalLM.from_config(config)
- n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())
+ n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py
index a69171766a..cf1607dccf 100755
--- a/examples/pytorch/multiple-choice/run_swag.py
+++ b/examples/pytorch/multiple-choice/run_swag.py
@@ -457,14 +457,14 @@ def main():
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
- kwargs = dict(
- finetuned_from=model_args.model_name_or_path,
- tasks="multiple-choice",
- dataset_tags="swag",
- dataset_args="regular",
- dataset="SWAG",
- language="en",
- )
+ kwargs = {
+ "finetuned_from": model_args.model_name_or_path,
+ "tasks": "multiple-choice",
+ "dataset_tags": "swag",
+ "dataset_args": "regular",
+ "dataset": "SWAG",
+ "language": "en",
+ }
if training_args.push_to_hub:
trainer.push_to_hub(**kwargs)
diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py
index b1583aca1f..a1fe0103a0 100644
--- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py
+++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py
@@ -430,7 +430,7 @@ def main():
pixel_values.append(image)
labels.append(target)
- encoding = dict()
+ encoding = {}
encoding["pixel_values"] = torch.stack(pixel_values)
encoding["labels"] = torch.stack(labels)
@@ -444,7 +444,7 @@ def main():
pixel_values.append(image)
labels.append(target)
- encoding = dict()
+ encoding = {}
encoding["pixel_values"] = torch.stack(pixel_values)
encoding["labels"] = torch.stack(labels)
diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
index 68919e0cc5..702adb0151 100644
--- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
+++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
@@ -441,7 +441,7 @@ def main():
pixel_values.append(image)
labels.append(target)
- encoding = dict()
+ encoding = {}
encoding["pixel_values"] = torch.stack(pixel_values)
encoding["labels"] = torch.stack(labels)
@@ -455,7 +455,7 @@ def main():
pixel_values.append(image)
labels.append(target)
- encoding = dict()
+ encoding = {}
encoding["pixel_values"] = torch.stack(pixel_values)
encoding["labels"] = torch.stack(labels)
diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
index c6cd82b436..f600c03f23 100755
--- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
+++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
@@ -349,7 +349,7 @@ def create_vocabulary_from_data(
lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values()
)
- vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
+ vocab_dict = {v: k for k, v in enumerate(sorted(vocab_set))}
# replace white space with delimiter token
if word_delimiter_token is not None:
diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py
index 1e7ab53455..fd8ba016ac 100755
--- a/examples/pytorch/text-classification/run_glue.py
+++ b/examples/pytorch/text-classification/run_glue.py
@@ -406,12 +406,12 @@ def main():
):
# Some have all caps in their config, some don't.
label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
- if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
+ if sorted(label_name_to_id.keys()) == sorted(label_list):
label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
+ f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
"\nIgnoring the model labels as a result.",
)
elif data_args.task_name is None and not is_regression:
diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py
index 03de2cf6b5..ee7438071f 100644
--- a/examples/pytorch/text-classification/run_glue_no_trainer.py
+++ b/examples/pytorch/text-classification/run_glue_no_trainer.py
@@ -339,7 +339,7 @@ def main():
):
# Some have all caps in their config, some don't.
label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
- if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
+ if sorted(label_name_to_id.keys()) == sorted(label_list):
logger.info(
f"The configuration of the model provided the following label correspondence: {label_name_to_id}. "
"Using it!"
@@ -348,7 +348,7 @@ def main():
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
+ f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
"\nIgnoring the model labels as a result.",
)
elif args.task_name is None and not is_regression:
diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py
index 065880e7e2..e575ed689e 100755
--- a/examples/pytorch/token-classification/run_ner.py
+++ b/examples/pytorch/token-classification/run_ner.py
@@ -386,7 +386,7 @@ def main():
# Model has labels -> use them.
if model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
- if list(sorted(model.config.label2id.keys())) == list(sorted(label_list)):
+ if sorted(model.config.label2id.keys()) == sorted(label_list):
# Reorganize `label_list` to match the ordering of the model.
if labels_are_int:
label_to_id = {i: int(model.config.label2id[l]) for i, l in enumerate(label_list)}
@@ -397,8 +397,8 @@ def main():
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
- f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
+ f"model labels: {sorted(model.config.label2id.keys())}, dataset labels:"
+ f" {sorted(label_list)}.\nIgnoring the model labels as a result.",
)
# Set the correspondences label/ID inside the model config
diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py
index ad63047223..0c6fa85b6b 100755
--- a/examples/pytorch/token-classification/run_ner_no_trainer.py
+++ b/examples/pytorch/token-classification/run_ner_no_trainer.py
@@ -425,7 +425,7 @@ def main():
# Model has labels -> use them.
if model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
- if list(sorted(model.config.label2id.keys())) == list(sorted(label_list)):
+ if sorted(model.config.label2id.keys()) == sorted(label_list):
# Reorganize `label_list` to match the ordering of the model.
if labels_are_int:
label_to_id = {i: int(model.config.label2id[l]) for i, l in enumerate(label_list)}
@@ -436,8 +436,8 @@ def main():
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
- f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
+ f"model labels: {sorted(model.config.label2id.keys())}, dataset labels:"
+ f" {sorted(label_list)}.\nIgnoring the model labels as a result.",
)
# Set the correspondences label/ID inside the model config
diff --git a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py
index aad680f201..8a59b46ab5 100755
--- a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py
+++ b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py
@@ -727,9 +727,9 @@ def main():
tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
checkpoints = [args.output_dir]
if args.eval_all_checkpoints:
- checkpoints = list(
+ checkpoints = [
os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
- )
+ ]
logger.info("Evaluate the following checkpoints: %s", checkpoints)
@@ -743,7 +743,7 @@ def main():
print(f"Evaluation for checkpoint {prefix}")
for patience in patience_list:
result = evaluate(args, model, tokenizer, prefix=prefix, patience=patience)
- result = dict((k + "_{}".format(global_step), v) for k, v in result.items())
+ result = {k + "_{}".format(global_step): v for k, v in result.items()}
results.update(result)
return results
diff --git a/examples/research_projects/bertabs/modeling_bertabs.py b/examples/research_projects/bertabs/modeling_bertabs.py
index 33e216f4a0..19e62804ef 100644
--- a/examples/research_projects/bertabs/modeling_bertabs.py
+++ b/examples/research_projects/bertabs/modeling_bertabs.py
@@ -54,7 +54,7 @@ class BertAbs(BertAbsPreTrainedModel):
load_bert_pretrained_extractive = True if bert_extractive_checkpoint else False
if load_bert_pretrained_extractive:
self.bert.model.load_state_dict(
- dict([(n[11:], p) for n, p in bert_extractive_checkpoint.items() if n.startswith("bert.model")]),
+ {n[11:]: p for n, p in bert_extractive_checkpoint.items() if n.startswith("bert.model")},
strict=True,
)
diff --git a/examples/research_projects/bertology/run_bertology.py b/examples/research_projects/bertology/run_bertology.py
index 030573d87f..4cb046066c 100644
--- a/examples/research_projects/bertology/run_bertology.py
+++ b/examples/research_projects/bertology/run_bertology.py
@@ -218,9 +218,9 @@ def prune_heads(args, model, eval_dataloader, head_mask):
original_time = datetime.now() - before_time
original_num_params = sum(p.numel() for p in model.parameters())
- heads_to_prune = dict(
- (layer, (1 - head_mask[layer].long()).nonzero().squeeze().tolist()) for layer in range(len(head_mask))
- )
+ heads_to_prune = {
+ layer: (1 - head_mask[layer].long()).nonzero().squeeze().tolist() for layer in range(len(head_mask))
+ }
assert sum(len(h) for h in heads_to_prune.values()) == (1 - head_mask.long()).sum().item()
model.prune_heads(heads_to_prune)
diff --git a/examples/research_projects/bertology/run_prune_gpt.py b/examples/research_projects/bertology/run_prune_gpt.py
index 68cece6e99..fa7484a787 100644
--- a/examples/research_projects/bertology/run_prune_gpt.py
+++ b/examples/research_projects/bertology/run_prune_gpt.py
@@ -194,9 +194,9 @@ def prune_heads(args, model, eval_dataloader, head_mask):
original_time = datetime.now() - before_time
original_num_params = sum(p.numel() for p in model.parameters())
- heads_to_prune = dict(
- (layer, (1 - head_mask[layer].long()).nonzero().squeeze().tolist()) for layer in range(len(head_mask))
- )
+ heads_to_prune = {
+ layer: (1 - head_mask[layer].long()).nonzero().squeeze().tolist() for layer in range(len(head_mask))
+ }
for k, v in heads_to_prune.items():
if isinstance(v, int):
diff --git a/examples/research_projects/codeparrot/scripts/minhash_deduplication.py b/examples/research_projects/codeparrot/scripts/minhash_deduplication.py
index 195a9dc809..f198471127 100644
--- a/examples/research_projects/codeparrot/scripts/minhash_deduplication.py
+++ b/examples/research_projects/codeparrot/scripts/minhash_deduplication.py
@@ -29,7 +29,7 @@ def get_min_hash(tokens: List[str]) -> Optional[MinHash]:
def get_tokens(code: str) -> Set[str]:
"""Tokenize a code snippet."""
- return set([t for t in NON_ALPHA.split(code) if len(t.strip()) > 0])
+ return {t for t in NON_ALPHA.split(code) if len(t.strip()) > 0}
class DuplicationIndex:
@@ -243,7 +243,7 @@ def deduplicate_dataset(
>>> ds_dedup, duplicate_clusters = deduplicate_dataset(ds, jaccard_threshold=0.85)
"""
duplicate_clusters = make_duplicate_clusters(dataset, jaccard_threshold)
- duplicate_indices = set(x["base_index"] for cluster in duplicate_clusters for x in cluster)
+ duplicate_indices = {x["base_index"] for cluster in duplicate_clusters for x in cluster}
extreme_dict = {}
extremes_clusters = find_extremes(duplicate_clusters, dataset, jaccard_threshold)
for extremes in extremes_clusters:
diff --git a/examples/research_projects/codeparrot/scripts/preprocessing.py b/examples/research_projects/codeparrot/scripts/preprocessing.py
index 07540d0b62..aecc37223f 100644
--- a/examples/research_projects/codeparrot/scripts/preprocessing.py
+++ b/examples/research_projects/codeparrot/scripts/preprocessing.py
@@ -114,7 +114,7 @@ def char_token_ratio(example):
def preprocess(example):
"""Chain all preprocessing steps into one function to not fill cache."""
- results = dict()
+ results = {}
results.update(get_hash(example))
results.update(line_stats(example))
results.update(alpha_stats(example))
diff --git a/examples/research_projects/codeparrot/scripts/pretokenizing.py b/examples/research_projects/codeparrot/scripts/pretokenizing.py
index 5eb793d10d..7cac8f5119 100644
--- a/examples/research_projects/codeparrot/scripts/pretokenizing.py
+++ b/examples/research_projects/codeparrot/scripts/pretokenizing.py
@@ -8,7 +8,7 @@ from transformers import AutoTokenizer, HfArgumentParser
def tokenize(example):
- output = dict()
+ output = {}
output["input_ids"] = tokenizer(example["content"], truncation=False)["input_ids"]
output["ratio_char_token"] = len(example["content"]) / len(output["input_ids"])
return output
diff --git a/examples/research_projects/deebert/run_glue_deebert.py b/examples/research_projects/deebert/run_glue_deebert.py
index f86390375f..6f7cfe65d0 100644
--- a/examples/research_projects/deebert/run_glue_deebert.py
+++ b/examples/research_projects/deebert/run_glue_deebert.py
@@ -685,9 +685,9 @@ def main():
tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
checkpoints = [args.output_dir]
if args.eval_all_checkpoints:
- checkpoints = list(
+ checkpoints = [
os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
- )
+ ]
logger.info("Evaluate the following checkpoints: %s", checkpoints)
for checkpoint in checkpoints:
@@ -725,7 +725,7 @@ def main():
for i in range(model.num_layers):
info_str += " {:.2f}".format(100 * each_layer_results[i])
logger.info(info_str)
- result = dict((k + "_{}".format(global_step), v) for k, v in result.items())
+ result = {k + "_{}".format(global_step): v for k, v in result.items()}
results.update(result)
return results
diff --git a/examples/research_projects/distillation/grouped_batch_sampler.py b/examples/research_projects/distillation/grouped_batch_sampler.py
index 83addc371f..a068f7e09e 100644
--- a/examples/research_projects/distillation/grouped_batch_sampler.py
+++ b/examples/research_projects/distillation/grouped_batch_sampler.py
@@ -27,7 +27,7 @@ from utils import logger
def _quantize(x, bins):
bins = copy.deepcopy(bins)
bins = sorted(bins)
- quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
+ quantized = [bisect.bisect_right(bins, y) for y in x]
return quantized
diff --git a/examples/research_projects/distillation/run_squad_w_distillation.py b/examples/research_projects/distillation/run_squad_w_distillation.py
index aba91995da..4b8b8e542f 100644
--- a/examples/research_projects/distillation/run_squad_w_distillation.py
+++ b/examples/research_projects/distillation/run_squad_w_distillation.py
@@ -850,9 +850,9 @@ def main():
logger.info("Loading checkpoints saved during training for evaluation")
checkpoints = [args.output_dir]
if args.eval_all_checkpoints:
- checkpoints = list(
+ checkpoints = [
os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
- )
+ ]
logger.info("Evaluate the following checkpoints: %s", checkpoints)
@@ -865,7 +865,7 @@ def main():
# Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step)
- result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
+ result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()}
results.update(result)
logger.info("Results: {}".format(results))
diff --git a/examples/research_projects/jax-projects/big_bird/bigbird_flax.py b/examples/research_projects/jax-projects/big_bird/bigbird_flax.py
index ac37cbc860..af5e11c83a 100644
--- a/examples/research_projects/jax-projects/big_bird/bigbird_flax.py
+++ b/examples/research_projects/jax-projects/big_bird/bigbird_flax.py
@@ -247,9 +247,12 @@ class Trainer:
lr = self.scheduler_fn(state_step - 1)
eval_loss = self.evaluate(state, val_dataset)
- logging_dict = dict(
- step=state_step.item(), eval_loss=eval_loss.item(), tr_loss=tr_loss, lr=lr.item()
- )
+ logging_dict = {
+ "step": state_step.item(),
+ "eval_loss": eval_loss.item(),
+ "tr_loss": tr_loss,
+ "lr": lr.item(),
+ }
tqdm.write(str(logging_dict))
self.logger.log(logging_dict, commit=True)
diff --git a/examples/research_projects/jax-projects/big_bird/evaluate.py b/examples/research_projects/jax-projects/big_bird/evaluate.py
index 32ca5172a5..04e9e01ca2 100644
--- a/examples/research_projects/jax-projects/big_bird/evaluate.py
+++ b/examples/research_projects/jax-projects/big_bird/evaluate.py
@@ -144,9 +144,9 @@ def main():
predictions = expand_to_aliases(example["output"])
# some preprocessing to both prediction and answer
- answers = set(["".join(a.split()) for a in answers])
- predictions = set(["".join(p.split()) for p in predictions])
- predictions = set([s for s in predictions if s not in ["``", "''", "`", "'"]])
+ answers = {"".join(a.split()) for a in answers}
+ predictions = {"".join(p.split()) for p in predictions}
+ predictions = {s for s in predictions if s not in ["``", "''", "`", "'"]}
# if there is a common element, it's a exact match
example["match"] = len(list(answers & predictions)) > 0
diff --git a/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py b/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py
index 22dc3e4550..6a202ba775 100644
--- a/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py
+++ b/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py
@@ -314,12 +314,12 @@ if __name__ == "__main__":
data = data["train" if PROCESS_TRAIN == "true" else "validation"]
- fn_kwargs = dict(
- tokenizer=tokenizer,
- doc_stride=DOC_STRIDE,
- max_length=MAX_LENGTH,
- assertion=False,
- )
+ fn_kwargs = {
+ "tokenizer": tokenizer,
+ "doc_stride": DOC_STRIDE,
+ "max_length": MAX_LENGTH,
+ "assertion": False,
+ }
data = data.map(prepare_inputs, fn_kwargs=fn_kwargs)
data = data.remove_columns(["annotations", "document", "id", "question"])
print(data)
diff --git a/examples/research_projects/jax-projects/model_parallel/partitions.py b/examples/research_projects/jax-projects/model_parallel/partitions.py
index e32ec97e42..86e54ad670 100644
--- a/examples/research_projects/jax-projects/model_parallel/partitions.py
+++ b/examples/research_projects/jax-projects/model_parallel/partitions.py
@@ -34,7 +34,7 @@ empty_dict = object()
def _match(qs, ks):
"""Return True if regexes in qs match any window of strings in tuple ks."""
# compile regexes and force complete match
- qts = tuple(map(lambda x: re.compile(x + "$"), qs))
+ qts = tuple((re.compile(x + "$") for x in qs))
for i in range(len(ks) - len(qs) + 1):
matches = [x.match(y) for x, y in zip(qts, ks[i:])]
if matches and all(matches):
diff --git a/examples/research_projects/longform-qa/eli5_utils.py b/examples/research_projects/longform-qa/eli5_utils.py
index db4eae6604..d4b235fdba 100644
--- a/examples/research_projects/longform-qa/eli5_utils.py
+++ b/examples/research_projects/longform-qa/eli5_utils.py
@@ -78,7 +78,7 @@ def query_es_index(question, es_client, index_name="english_wiki_kilt_snippets_1
)
hits = response["hits"]["hits"]
support_doc = "
" + "
".join([hit["_source"]["passage_text"] for hit in hits])
- res_list = [dict([(k, hit["_source"][k]) for k in hit["_source"] if k != "passage_text"]) for hit in hits]
+ res_list = [{k: hit["_source"][k] for k in hit["_source"] if k != "passage_text"} for hit in hits]
for r, hit in zip(res_list, hits):
r["passage_id"] = hit["_id"]
r["score"] = hit["_score"]
@@ -601,7 +601,7 @@ def make_qa_dense_index(
fp = np.memmap(index_name, dtype=dtype, mode="w+", shape=(passages_dset.num_rows, 128))
n_batches = math.ceil(passages_dset.num_rows / batch_size)
for i in range(n_batches):
- passages = [p for p in passages_dset[i * batch_size : (i + 1) * batch_size]["passage_text"]]
+ passages = list(passages_dset[i * batch_size : (i + 1) * batch_size]["passage_text"])
reps = embed_passages_for_retrieval(passages, tokenizer, qa_embedder, max_length, device)
fp[i * batch_size : (i + 1) * batch_size] = reps
if i % 50 == 0:
@@ -634,7 +634,7 @@ def query_qa_dense_index(
D, I = wiki_index.search(q_rep, 2 * n_results)
res_passages = [wiki_passages[int(i)] for i in I[0]]
support_doc = "
" + "
".join([p["passage_text"] for p in res_passages])
- res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages]
+ res_list = [{k: p[k] for k in wiki_passages.column_names} for p in res_passages]
res_list = [res for res in res_list if len(res["passage_text"].split()) > min_length][:n_results]
for r, sc in zip(res_list, D[0]):
r["score"] = float(sc)
@@ -650,7 +650,7 @@ def batch_query_qa_dense_index(questions, qa_embedder, tokenizer, wiki_passages,
]
all_res_lists = []
for res_passages, dl in zip(res_passages_lst, D):
- res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages]
+ res_list = [{k: p[k] for k in wiki_passages.column_names} for p in res_passages]
for r, sc in zip(res_list, dl):
r["score"] = float(sc)
all_res_lists += [res_list[:]]
@@ -663,7 +663,7 @@ def query_qa_dense_index_nn(passage, qa_embedder, tokenizer, wiki_passages, wiki
D, I = wiki_index.search(a_rep, 2 * n_results)
res_passages = [wiki_passages[int(i)] for i in I[0]]
support_doc = "
" + "
".join([p["passage_text"] for p in res_passages])
- res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages]
+ res_list = [{k: p[k] for k in wiki_passages.column_names} for p in res_passages]
res_list = [res for res in res_list if len(res["passage_text"].split()) > min_length][:n_results]
for r, sc, i in zip(res_list, D[0], I[0]):
r["passage_id"] = int(i)
@@ -680,7 +680,7 @@ def batch_query_qa_dense_index_nn(passages, qa_embedder, tokenizer, wiki_passage
]
all_res_lists = []
for res_passages, dl, il in zip(res_passages_lst, D, I):
- res_list = [dict([(k, p[k]) for k in wiki_passages.column_names]) for p in res_passages]
+ res_list = [{k: p[k] for k in wiki_passages.column_names} for p in res_passages]
for r, sc, i in zip(res_list, dl, il):
r["passage_id"] = int(i)
r["score"] = float(sc)
diff --git a/examples/research_projects/lxmert/extracting_data.py b/examples/research_projects/lxmert/extracting_data.py
index 9c445be336..6b1342c9b1 100644
--- a/examples/research_projects/lxmert/extracting_data.py
+++ b/examples/research_projects/lxmert/extracting_data.py
@@ -61,7 +61,7 @@ class Extract:
assert outputfile is not None and not os.path.isfile(outputfile), f"{outputfile}"
if subset_list is not None:
with open(os.path.realpath(subset_list)) as f:
- self.subset_list = set(map(lambda x: self._vqa_file_split()[0], tryload(f)))
+ self.subset_list = {self._vqa_file_split()[0] for x in tryload(f)}
else:
self.subset_list = None
diff --git a/examples/research_projects/lxmert/modeling_frcnn.py b/examples/research_projects/lxmert/modeling_frcnn.py
index 08758b1d3c..edbd224cbe 100644
--- a/examples/research_projects/lxmert/modeling_frcnn.py
+++ b/examples/research_projects/lxmert/modeling_frcnn.py
@@ -1095,7 +1095,7 @@ class ROIPooler(nn.Module):
Returns:
A tensor of shape(N*B, Channels, output_size, output_size)
"""
- x = [v for v in feature_maps.values()]
+ x = list(feature_maps.values())
num_level_assignments = len(self.level_poolers)
assert len(x) == num_level_assignments and len(boxes) == x[0].size(0)
diff --git a/examples/research_projects/mm-imdb/run_mmimdb.py b/examples/research_projects/mm-imdb/run_mmimdb.py
index 23b2a65e5c..2cc3bc3a0c 100644
--- a/examples/research_projects/mm-imdb/run_mmimdb.py
+++ b/examples/research_projects/mm-imdb/run_mmimdb.py
@@ -554,9 +554,9 @@ def main():
if args.do_eval and args.local_rank in [-1, 0]:
checkpoints = [args.output_dir]
if args.eval_all_checkpoints:
- checkpoints = list(
+ checkpoints = [
os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
- )
+ ]
logger.info("Evaluate the following checkpoints: %s", checkpoints)
for checkpoint in checkpoints:
@@ -566,7 +566,7 @@ def main():
model.load_state_dict(torch.load(checkpoint))
model.to(args.device)
result = evaluate(args, model, tokenizer, criterion, prefix=prefix)
- result = dict((k + "_{}".format(global_step), v) for k, v in result.items())
+ result = {k + "_{}".format(global_step): v for k, v in result.items()}
results.update(result)
return results
diff --git a/examples/research_projects/movement-pruning/masked_run_glue.py b/examples/research_projects/movement-pruning/masked_run_glue.py
index 4ce56e524f..a28cdcc583 100644
--- a/examples/research_projects/movement-pruning/masked_run_glue.py
+++ b/examples/research_projects/movement-pruning/masked_run_glue.py
@@ -941,9 +941,9 @@ def main():
tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
checkpoints = [args.output_dir]
if args.eval_all_checkpoints:
- checkpoints = list(
+ checkpoints = [
os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
- )
+ ]
logger.info("Evaluate the following checkpoints: %s", checkpoints)
for checkpoint in checkpoints:
@@ -953,7 +953,7 @@ def main():
model = model_class.from_pretrained(checkpoint)
model.to(args.device)
result = evaluate(args, model, tokenizer, prefix=prefix)
- result = dict((k + "_{}".format(global_step), v) for k, v in result.items())
+ result = {k + "_{}".format(global_step): v for k, v in result.items()}
results.update(result)
return results
diff --git a/examples/research_projects/movement-pruning/masked_run_squad.py b/examples/research_projects/movement-pruning/masked_run_squad.py
index a516bb8d58..189ed5be67 100644
--- a/examples/research_projects/movement-pruning/masked_run_squad.py
+++ b/examples/research_projects/movement-pruning/masked_run_squad.py
@@ -1109,10 +1109,10 @@ def main():
logger.info("Loading checkpoints saved during training for evaluation")
checkpoints = [args.output_dir]
if args.eval_all_checkpoints:
- checkpoints = list(
+ checkpoints = [
os.path.dirname(c)
for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
- )
+ ]
else:
logger.info("Loading checkpoint %s for evaluation", args.model_name_or_path)
@@ -1129,7 +1129,7 @@ def main():
# Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step)
- result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
+ result = {k + ("_{}".format(global_step) if global_step else ""): v for k, v in result.items()}
results.update(result)
logger.info("Results: {}".format(results))
diff --git a/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py b/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py
index d327cdb284..1df20e4504 100644
--- a/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py
+++ b/examples/research_projects/onnx/summarization/bart_onnx/reduce_onnx_size.py
@@ -42,8 +42,8 @@ def _graph_replace_input_with(graph_proto, name, new_name):
def _remove_dup_initializers_from_model(model, model_without_ext, ind_to_replace):
- inits_with_data = [i for i in model.graph.initializer]
- inits = [i for i in model_without_ext.graph.initializer]
+ inits_with_data = list(model.graph.initializer)
+ inits = list(model_without_ext.graph.initializer)
for i, ref_i in ind_to_replace:
assert inits_with_data[i].name == inits[i].name
assert inits_with_data[ref_i].name == inits[ref_i].name
@@ -69,7 +69,7 @@ def remove_dup_initializers(onnx_file_path):
model = onnx.load(os.path.join(model_file_folder, model_file_name))
- inits = [i for i in model.graph.initializer]
+ inits = list(model.graph.initializer)
dup_set = set()
dup_map = {}
diff --git a/examples/research_projects/pplm/run_pplm.py b/examples/research_projects/pplm/run_pplm.py
index 54784b944c..54008d56c1 100644
--- a/examples/research_projects/pplm/run_pplm.py
+++ b/examples/research_projects/pplm/run_pplm.py
@@ -127,11 +127,9 @@ def perturb_past(
_, _, _, curr_length, _ = past[0].shape
if curr_length > window_length and window_length > 0:
- ones_key_val_shape = tuple(past[0].shape[:-2]) + tuple([window_length]) + tuple(past[0].shape[-1:])
+ ones_key_val_shape = tuple(past[0].shape[:-2]) + (window_length,) + tuple(past[0].shape[-1:])
- zeros_key_val_shape = (
- tuple(past[0].shape[:-2]) + tuple([curr_length - window_length]) + tuple(past[0].shape[-1:])
- )
+ zeros_key_val_shape = tuple(past[0].shape[:-2]) + (curr_length - window_length,) + tuple(past[0].shape[-1:])
ones_mask = torch.ones(ones_key_val_shape)
ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
diff --git a/examples/research_projects/rag-end2end-retriever/finetune_rag.py b/examples/research_projects/rag-end2end-retriever/finetune_rag.py
index 8d0ba293b1..194eeb3fa3 100644
--- a/examples/research_projects/rag-end2end-retriever/finetune_rag.py
+++ b/examples/research_projects/rag-end2end-retriever/finetune_rag.py
@@ -164,11 +164,11 @@ class GenerativeQAModule(BaseTransformer):
self.step_count = 0
self.metrics = defaultdict(list)
- self.dataset_kwargs: dict = dict(
- data_dir=self.hparams.data_dir,
- max_source_length=self.hparams.max_source_length,
- prefix=prefix or "",
- )
+ self.dataset_kwargs: dict = {
+ "data_dir": self.hparams.data_dir,
+ "max_source_length": self.hparams.max_source_length,
+ "prefix": prefix or "",
+ }
n_observations_per_split = {
"train": self.hparams.n_train,
"val": self.hparams.n_val,
diff --git a/examples/research_projects/rag-end2end-retriever/utils_rag.py b/examples/research_projects/rag-end2end-retriever/utils_rag.py
index 7bf5d7e35e..ec98c1d782 100644
--- a/examples/research_projects/rag-end2end-retriever/utils_rag.py
+++ b/examples/research_projects/rag-end2end-retriever/utils_rag.py
@@ -137,7 +137,7 @@ logger = getLogger(__name__)
def flatten_list(summary_ids: List[List]):
- return [x for x in itertools.chain.from_iterable(summary_ids)]
+ return list(itertools.chain.from_iterable(summary_ids))
def save_git_info(folder_path: str) -> None:
diff --git a/examples/research_projects/rag/finetune_rag.py b/examples/research_projects/rag/finetune_rag.py
index f5cef614e2..2e058850ec 100644
--- a/examples/research_projects/rag/finetune_rag.py
+++ b/examples/research_projects/rag/finetune_rag.py
@@ -162,11 +162,11 @@ class GenerativeQAModule(BaseTransformer):
self.step_count = 0
self.metrics = defaultdict(list)
- self.dataset_kwargs: dict = dict(
- data_dir=self.hparams.data_dir,
- max_source_length=self.hparams.max_source_length,
- prefix=prefix or "",
- )
+ self.dataset_kwargs: dict = {
+ "data_dir": self.hparams.data_dir,
+ "max_source_length": self.hparams.max_source_length,
+ "prefix": prefix or "",
+ }
n_observations_per_split = {
"train": self.hparams.n_train,
"val": self.hparams.n_val,
diff --git a/examples/research_projects/rag/utils_rag.py b/examples/research_projects/rag/utils_rag.py
index 7bf5d7e35e..ec98c1d782 100644
--- a/examples/research_projects/rag/utils_rag.py
+++ b/examples/research_projects/rag/utils_rag.py
@@ -137,7 +137,7 @@ logger = getLogger(__name__)
def flatten_list(summary_ids: List[List]):
- return [x for x in itertools.chain.from_iterable(summary_ids)]
+ return list(itertools.chain.from_iterable(summary_ids))
def save_git_info(folder_path: str) -> None:
diff --git a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py
index aaacc79ceb..abbe9a9982 100755
--- a/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py
+++ b/examples/research_projects/robust-speech-event/run_speech_recognition_ctc_bnb.py
@@ -344,7 +344,7 @@ def create_vocabulary_from_data(
lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values()
)
- vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
+ vocab_dict = {v: k for k, v in enumerate(sorted(vocab_set))}
# replace white space with delimiter token
if word_delimiter_token is not None:
diff --git a/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples.py b/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples.py
index b1c84ad9b8..454951ed38 100644
--- a/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples.py
+++ b/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples.py
@@ -145,18 +145,18 @@ class TestSummarizationDistiller(TestCasePlus):
assert not failures, f"The following models could not be loaded through AutoConfig: {failures}"
def test_distill_no_teacher(self):
- updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True)
+ updates = {"student_encoder_layers": 2, "student_decoder_layers": 1, "no_teacher": True}
self._test_distiller_cli(updates)
def test_distill_checkpointing_with_teacher(self):
- updates = dict(
- student_encoder_layers=2,
- student_decoder_layers=1,
- max_epochs=4,
- val_check_interval=0.25,
- alpha_hid=2.0,
- model_name_or_path="IGNORE_THIS_IT_DOESNT_GET_USED",
- )
+ updates = {
+ "student_encoder_layers": 2,
+ "student_decoder_layers": 1,
+ "max_epochs": 4,
+ "val_check_interval": 0.25,
+ "alpha_hid": 2.0,
+ "model_name_or_path": "IGNORE_THIS_IT_DOESNT_GET_USED",
+ }
model = self._test_distiller_cli(updates, check_contents=False)
ckpts = list(Path(model.output_dir).glob("*.ckpt"))
@@ -193,19 +193,19 @@ class TestSummarizationDistiller(TestCasePlus):
self.assertEqual(nll_loss, model_computed_loss)
def test_distill_mbart(self):
- updates = dict(
- student_encoder_layers=2,
- student_decoder_layers=1,
- num_train_epochs=4,
- val_check_interval=0.25,
- alpha_hid=2.0,
- task="translation",
- model_name_or_path="IGNORE_THIS_IT_DOESNT_GET_USED",
- tokenizer_name=MBART_TINY,
- teacher=MBART_TINY,
- src_lang="en_XX",
- tgt_lang="ro_RO",
- )
+ updates = {
+ "student_encoder_layers": 2,
+ "student_decoder_layers": 1,
+ "num_train_epochs": 4,
+ "val_check_interval": 0.25,
+ "alpha_hid": 2.0,
+ "task": "translation",
+ "model_name_or_path": "IGNORE_THIS_IT_DOESNT_GET_USED",
+ "tokenizer_name": MBART_TINY,
+ "teacher": MBART_TINY,
+ "src_lang": "en_XX",
+ "tgt_lang": "ro_RO",
+ }
model = self._test_distiller_cli(updates, check_contents=False)
assert model.model.config.model_type == "mbart"
@@ -217,39 +217,39 @@ class TestSummarizationDistiller(TestCasePlus):
self.assertEqual(len(transformer_ckpts), 2)
def test_distill_t5(self):
- updates = dict(
- student_encoder_layers=1,
- student_decoder_layers=1,
- alpha_hid=2.0,
- teacher=T5_TINY,
- model_name_or_path=T5_TINY,
- tokenizer_name=T5_TINY,
- )
+ updates = {
+ "student_encoder_layers": 1,
+ "student_decoder_layers": 1,
+ "alpha_hid": 2.0,
+ "teacher": T5_TINY,
+ "model_name_or_path": T5_TINY,
+ "tokenizer_name": T5_TINY,
+ }
self._test_distiller_cli(updates)
def test_distill_different_base_models(self):
- updates = dict(
- teacher=T5_TINY,
- student=T5_TINIER,
- model_name_or_path=T5_TINIER,
- tokenizer_name=T5_TINIER,
- )
+ updates = {
+ "teacher": T5_TINY,
+ "student": T5_TINIER,
+ "model_name_or_path": T5_TINIER,
+ "tokenizer_name": T5_TINIER,
+ }
self._test_distiller_cli(updates)
def _test_distiller_cli(self, updates, check_contents=True):
- default_updates = dict(
- label_smoothing=0.0,
- early_stopping_patience=-1,
- train_batch_size=1,
- eval_batch_size=2,
- max_epochs=2,
- alpha_mlm=0.2,
- alpha_ce=0.8,
- do_predict=True,
- model_name_or_path="sshleifer/tinier_bart",
- teacher=CHEAP_ARGS["model_name_or_path"],
- val_check_interval=0.5,
- )
+ default_updates = {
+ "label_smoothing": 0.0,
+ "early_stopping_patience": -1,
+ "train_batch_size": 1,
+ "eval_batch_size": 2,
+ "max_epochs": 2,
+ "alpha_mlm": 0.2,
+ "alpha_ce": 0.8,
+ "do_predict": True,
+ "model_name_or_path": "sshleifer/tinier_bart",
+ "teacher": CHEAP_ARGS["model_name_or_path"],
+ "val_check_interval": 0.5,
+ }
default_updates.update(updates)
args_d: dict = CHEAP_ARGS.copy()
tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir())
diff --git a/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples_multi_gpu.py b/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples_multi_gpu.py
index bb06ec8e65..9eeb3b30d3 100644
--- a/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples_multi_gpu.py
+++ b/examples/research_projects/seq2seq-distillation/_test_seq2seq_examples_multi_gpu.py
@@ -98,29 +98,29 @@ class TestSummarizationDistillerMultiGPU(TestCasePlus):
@require_torch_multi_gpu
def test_multi_gpu(self):
- updates = dict(
- no_teacher=True,
- freeze_encoder=True,
- gpus=2,
- overwrite_output_dir=True,
- sortish_sampler=True,
- )
+ updates = {
+ "no_teacher": True,
+ "freeze_encoder": True,
+ "gpus": 2,
+ "overwrite_output_dir": True,
+ "sortish_sampler": True,
+ }
self._test_distiller_cli_fork(updates, check_contents=False)
def _test_distiller_cli_fork(self, updates, check_contents=True):
- default_updates = dict(
- label_smoothing=0.0,
- early_stopping_patience=-1,
- train_batch_size=1,
- eval_batch_size=2,
- max_epochs=2,
- alpha_mlm=0.2,
- alpha_ce=0.8,
- do_predict=True,
- model_name_or_path="sshleifer/tinier_bart",
- teacher=CHEAP_ARGS["model_name_or_path"],
- val_check_interval=0.5,
- )
+ default_updates = {
+ "label_smoothing": 0.0,
+ "early_stopping_patience": -1,
+ "train_batch_size": 1,
+ "eval_batch_size": 2,
+ "max_epochs": 2,
+ "alpha_mlm": 0.2,
+ "alpha_ce": 0.8,
+ "do_predict": True,
+ "model_name_or_path": "sshleifer/tinier_bart",
+ "teacher": CHEAP_ARGS["model_name_or_path"],
+ "val_check_interval": 0.5,
+ }
default_updates.update(updates)
args_d: dict = CHEAP_ARGS.copy()
tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir())
diff --git a/examples/research_projects/seq2seq-distillation/finetune.py b/examples/research_projects/seq2seq-distillation/finetune.py
index 77f02bef13..a13f9b533d 100755
--- a/examples/research_projects/seq2seq-distillation/finetune.py
+++ b/examples/research_projects/seq2seq-distillation/finetune.py
@@ -74,11 +74,11 @@ class SummarizationModule(BaseTransformer):
self.model_type = self.config.model_type
self.vocab_size = self.config.tgt_vocab_size if self.model_type == "fsmt" else self.config.vocab_size
- self.dataset_kwargs: dict = dict(
- data_dir=self.hparams.data_dir,
- max_source_length=self.hparams.max_source_length,
- prefix=self.model.config.prefix or "",
- )
+ self.dataset_kwargs: dict = {
+ "data_dir": self.hparams.data_dir,
+ "max_source_length": self.hparams.max_source_length,
+ "prefix": self.model.config.prefix or "",
+ }
n_observations_per_split = {
"train": self.hparams.n_train,
"val": self.hparams.n_val,
@@ -433,7 +433,7 @@ def main(args, model=None) -> SummarizationModule:
return model
model.hparams.test_checkpoint = ""
- checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "*.ckpt"), recursive=True)))
+ checkpoints = sorted(glob.glob(os.path.join(args.output_dir, "*.ckpt"), recursive=True))
if checkpoints:
model.hparams.test_checkpoint = checkpoints[-1]
trainer.resume_from_checkpoint = checkpoints[-1]
diff --git a/examples/research_projects/seq2seq-distillation/make_student.py b/examples/research_projects/seq2seq-distillation/make_student.py
index c1efc1b497..83e014bf48 100644
--- a/examples/research_projects/seq2seq-distillation/make_student.py
+++ b/examples/research_projects/seq2seq-distillation/make_student.py
@@ -171,11 +171,11 @@ def create_student_by_copying_alternating_layers(
logger.info(
f"Copied encoder layers {e_layers_to_copy} and decoder layers {d_layers_to_copy}. Saving them to {save_path}"
)
- student.config.init_metadata = dict(
- teacher_type=teacher.config.model_type,
- copied_encoder_layers=e_layers_to_copy,
- copied_decoder_layers=d_layers_to_copy,
- )
+ student.config.init_metadata = {
+ "teacher_type": teacher.config.model_type,
+ "copied_encoder_layers": e_layers_to_copy,
+ "copied_decoder_layers": d_layers_to_copy,
+ }
student.save_pretrained(save_path)
# Save information about copying for easier reproducibility
diff --git a/examples/research_projects/seq2seq-distillation/run_eval.py b/examples/research_projects/seq2seq-distillation/run_eval.py
index 3f685884e8..98c9786d2c 100755
--- a/examples/research_projects/seq2seq-distillation/run_eval.py
+++ b/examples/research_projects/seq2seq-distillation/run_eval.py
@@ -63,7 +63,7 @@ def generate_summaries_or_translations(
fout.close()
runtime = int(time.time() - start_time) # seconds
n_obs = len(examples)
- return dict(n_obs=n_obs, runtime=runtime, seconds_per_sample=round(runtime / n_obs, 4))
+ return {"n_obs": n_obs, "runtime": runtime, "seconds_per_sample": round(runtime / n_obs, 4)}
def datetime_now():
diff --git a/examples/research_projects/seq2seq-distillation/utils.py b/examples/research_projects/seq2seq-distillation/utils.py
index f1a8cef850..de666e0c24 100644
--- a/examples/research_projects/seq2seq-distillation/utils.py
+++ b/examples/research_projects/seq2seq-distillation/utils.py
@@ -437,7 +437,7 @@ def pickle_save(obj, path):
def flatten_list(summary_ids: List[List]):
- return [x for x in itertools.chain.from_iterable(summary_ids)]
+ return list(itertools.chain.from_iterable(summary_ids))
def save_git_info(folder_path: str) -> None:
diff --git a/examples/research_projects/tapex/wikisql_utils.py b/examples/research_projects/tapex/wikisql_utils.py
index 3028e81ad4..110b14e02f 100644
--- a/examples/research_projects/tapex/wikisql_utils.py
+++ b/examples/research_projects/tapex/wikisql_utils.py
@@ -30,7 +30,7 @@ EMPTY_ANSWER_AGG = "none"
def _split_thousands(delimiter, value):
split = value.split(delimiter)
- return len(split) > 1 and any(map(lambda x: len(x) == 3, split))
+ return len(split) > 1 and any((len(x) == 3 for x in split))
def convert_to_float(value):
@@ -123,7 +123,7 @@ _TOKENIZER = re.compile(r"\w+|[^\w\s]+", re.UNICODE | re.MULTILINE | re.DOTALL)
def _normalize_for_match(x):
- return [t for t in _TOKENIZER.findall(x.lower())]
+ return list(_TOKENIZER.findall(x.lower()))
def _compare(operator, src, tgt):
diff --git a/examples/research_projects/visual_bert/extracting_data.py b/examples/research_projects/visual_bert/extracting_data.py
index 9c445be336..6b1342c9b1 100644
--- a/examples/research_projects/visual_bert/extracting_data.py
+++ b/examples/research_projects/visual_bert/extracting_data.py
@@ -61,7 +61,7 @@ class Extract:
assert outputfile is not None and not os.path.isfile(outputfile), f"{outputfile}"
if subset_list is not None:
with open(os.path.realpath(subset_list)) as f:
- self.subset_list = set(map(lambda x: self._vqa_file_split()[0], tryload(f)))
+ self.subset_list = {self._vqa_file_split()[0] for x in tryload(f)}
else:
self.subset_list = None
diff --git a/examples/research_projects/visual_bert/modeling_frcnn.py b/examples/research_projects/visual_bert/modeling_frcnn.py
index 08758b1d3c..edbd224cbe 100644
--- a/examples/research_projects/visual_bert/modeling_frcnn.py
+++ b/examples/research_projects/visual_bert/modeling_frcnn.py
@@ -1095,7 +1095,7 @@ class ROIPooler(nn.Module):
Returns:
A tensor of shape(N*B, Channels, output_size, output_size)
"""
- x = [v for v in feature_maps.values()]
+ x = list(feature_maps.values())
num_level_assignments = len(self.level_poolers)
assert len(x) == num_level_assignments and len(boxes) == x[0].size(0)
diff --git a/examples/research_projects/vqgan-clip/VQGAN_CLIP.py b/examples/research_projects/vqgan-clip/VQGAN_CLIP.py
index b5a23c15b2..1bfbc4cd5c 100644
--- a/examples/research_projects/vqgan-clip/VQGAN_CLIP.py
+++ b/examples/research_projects/vqgan-clip/VQGAN_CLIP.py
@@ -99,7 +99,7 @@ class VQGAN_CLIP(nn.Module):
output_path = "./animation.gif"
if input_path is None:
input_path = self.save_path
- paths = list(sorted(glob(input_path + "/*")))
+ paths = sorted(glob(input_path + "/*"))
if not len(paths):
raise ValueError(
"No images found in save path, aborting (did you pass save_intermediate=True to the generate"
@@ -178,7 +178,7 @@ class VQGAN_CLIP(nn.Module):
wandb.init(reinit=True, project="face-editor")
wandb.config.update({"Positive Prompts": positive_prompts})
wandb.config.update({"Negative Prompts": negative_prompts})
- wandb.config.update(dict(lr=self.lr, iterations=self.iterations))
+ wandb.config.update({"lr": self.lr, "iterations": self.iterations})
if image_path:
image = Image.open(image_path)
image = image.resize((256, 256))
diff --git a/examples/research_projects/vqgan-clip/loaders.py b/examples/research_projects/vqgan-clip/loaders.py
index e8650f7212..88513bcb69 100644
--- a/examples/research_projects/vqgan-clip/loaders.py
+++ b/examples/research_projects/vqgan-clip/loaders.py
@@ -47,7 +47,7 @@ def get_obj_from_str(string, reload=False):
def instantiate_from_config(config):
if "target" not in config:
raise KeyError("Expected key `target` to instantiate.")
- return get_obj_from_str(config["target"])(**config.get("params", dict()))
+ return get_obj_from_str(config["target"])(**config.get("params", {}))
def load_model_from_config(config, sd, gpu=True, eval_mode=True):
diff --git a/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py b/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
index 8f181409d6..0f3e239df6 100644
--- a/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
+++ b/examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
@@ -51,7 +51,7 @@ from transformers.trainer_utils import set_seed # noqa
set_seed(42)
-models = dict(base="patrickvonplaten/wav2vec2_tiny_random", robust="patrickvonplaten/wav2vec2_tiny_random_robust")
+models = {"base": "patrickvonplaten/wav2vec2_tiny_random", "robust": "patrickvonplaten/wav2vec2_tiny_random_robust"}
ZERO2 = "zero2"
ZERO3 = "zero3"
diff --git a/examples/research_projects/xtreme-s/run_xtreme_s.py b/examples/research_projects/xtreme-s/run_xtreme_s.py
index 38ed3376ec..6c5b4bde89 100644
--- a/examples/research_projects/xtreme-s/run_xtreme_s.py
+++ b/examples/research_projects/xtreme-s/run_xtreme_s.py
@@ -400,7 +400,7 @@ def create_vocabulary_from_data(
| (set(vocabs["predict"]["vocab"][0]) if "predict" in vocabs else set())
)
- vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
+ vocab_dict = {v: k for k, v in enumerate(sorted(vocab_set))}
# replace white space with delimiter token
if word_delimiter_token is not None:
diff --git a/examples/tensorflow/benchmarking/plot_csv_file.py b/examples/tensorflow/benchmarking/plot_csv_file.py
index 1a0ae735d8..9a9ad9c670 100644
--- a/examples/tensorflow/benchmarking/plot_csv_file.py
+++ b/examples/tensorflow/benchmarking/plot_csv_file.py
@@ -83,7 +83,7 @@ def can_convert_to_float(string):
class Plot:
def __init__(self, args):
self.args = args
- self.result_dict = defaultdict(lambda: dict(bsz=[], seq_len=[], result={}))
+ self.result_dict = defaultdict(lambda: {"bsz": [], "seq_len": [], "result": {}})
with open(self.args.csv_file, newline="") as csv_file:
reader = csv.DictReader(csv_file)
@@ -116,8 +116,8 @@ class Plot:
axis.set_major_formatter(ScalarFormatter())
for model_name_idx, model_name in enumerate(self.result_dict.keys()):
- batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"])))
- sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"])))
+ batch_sizes = sorted(set(self.result_dict[model_name]["bsz"]))
+ sequence_lengths = sorted(set(self.result_dict[model_name]["seq_len"]))
results = self.result_dict[model_name]["result"]
(x_axis_array, inner_loop_array) = (
diff --git a/examples/tensorflow/image-classification/run_image_classification.py b/examples/tensorflow/image-classification/run_image_classification.py
index d9fcc8daaf..b115906064 100644
--- a/examples/tensorflow/image-classification/run_image_classification.py
+++ b/examples/tensorflow/image-classification/run_image_classification.py
@@ -300,7 +300,7 @@ def main():
# Prepare label mappings.
# We'll include these in the model's config to get human readable labels in the Inference API.
labels = dataset["train"].features["labels"].names
- label2id, id2label = dict(), dict()
+ label2id, id2label = {}, {}
for i, label in enumerate(labels):
label2id[label] = str(i)
id2label[str(i)] = label
diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py
index 51087123b5..861929afb5 100755
--- a/examples/tensorflow/language-modeling/run_clm.py
+++ b/examples/tensorflow/language-modeling/run_clm.py
@@ -600,7 +600,7 @@ def main():
if training_args.output_dir is not None:
output_eval_file = os.path.join(training_args.output_dir, "all_results.json")
- results_dict = dict()
+ results_dict = {}
results_dict["train_loss"] = train_loss
results_dict["train_perplexity"] = train_perplexity
results_dict["eval_loss"] = validation_loss
diff --git a/examples/tensorflow/language-modeling/run_mlm.py b/examples/tensorflow/language-modeling/run_mlm.py
index f7812b611b..5db7130df5 100755
--- a/examples/tensorflow/language-modeling/run_mlm.py
+++ b/examples/tensorflow/language-modeling/run_mlm.py
@@ -623,7 +623,7 @@ def main():
if training_args.output_dir is not None:
output_eval_file = os.path.join(training_args.output_dir, "all_results.json")
- results_dict = dict()
+ results_dict = {}
results_dict["train_loss"] = train_loss
results_dict["train_perplexity"] = train_perplexity
results_dict["eval_loss"] = validation_loss
diff --git a/examples/tensorflow/question-answering/run_qa.py b/examples/tensorflow/question-answering/run_qa.py
index 1c3acd34ae..d6a816525e 100755
--- a/examples/tensorflow/question-answering/run_qa.py
+++ b/examples/tensorflow/question-answering/run_qa.py
@@ -464,7 +464,7 @@ def main():
return tokenized_examples
- processed_datasets = dict()
+ processed_datasets = {}
if training_args.do_train:
if "train" not in datasets:
raise ValueError("--do_train requires a train dataset")
diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py
index bf03901011..428565bb24 100644
--- a/examples/tensorflow/text-classification/run_glue.py
+++ b/examples/tensorflow/text-classification/run_glue.py
@@ -310,12 +310,12 @@ def main():
if config.label2id != PretrainedConfig(num_labels=num_labels).label2id and not is_regression:
# Some have all caps in their config, some don't.
label_name_to_id = {k.lower(): v for k, v in config.label2id.items()}
- if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
+ if sorted(label_name_to_id.keys()) == sorted(label_list):
label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
+ f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
"\nIgnoring the model labels as a result.",
)
label_to_id = {label: i for i, label in enumerate(label_list)}
@@ -383,7 +383,7 @@ def main():
dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
num_replicas = training_args.strategy.num_replicas_in_sync
- tf_data = dict()
+ tf_data = {}
max_samples = {
"train": data_args.max_train_samples,
"validation": data_args.max_eval_samples,
diff --git a/examples/tensorflow/text-classification/run_text_classification.py b/examples/tensorflow/text-classification/run_text_classification.py
index 0cf1972e93..f46d11c61c 100644
--- a/examples/tensorflow/text-classification/run_text_classification.py
+++ b/examples/tensorflow/text-classification/run_text_classification.py
@@ -343,13 +343,13 @@ def main():
if "train" in datasets:
if not is_regression and config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
label_name_to_id = config.label2id
- if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
+ if sorted(label_name_to_id.keys()) == sorted(label_list):
label_to_id = label_name_to_id # Use the model's labels
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels:"
- f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
+ f"model labels: {sorted(label_name_to_id.keys())}, dataset labels:"
+ f" {sorted(label_list)}.\nIgnoring the model labels as a result.",
)
label_to_id = {v: i for i, v in enumerate(label_list)}
elif not is_regression:
@@ -411,7 +411,7 @@ def main():
dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
num_replicas = training_args.strategy.num_replicas_in_sync
- tf_data = dict()
+ tf_data = {}
max_samples = {
"train": data_args.max_train_samples,
"validation": data_args.max_val_samples,
diff --git a/pyproject.toml b/pyproject.toml
index 26fa9e0bb0..1a488dbba9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ target-version = ['py37']
[tool.ruff]
# Never enforce `E501` (line length violations).
ignore = ["E501", "E741", "W605"]
-select = ["E", "F", "I", "W"]
+select = ["C", "E", "F", "I", "W"]
line-length = 119
# Ignore import violations in all `__init__.py` files.
diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py
index a6c6353c19..bde10f6712 100644
--- a/src/transformers/benchmark/benchmark_utils.py
+++ b/src/transformers/benchmark/benchmark_utils.py
@@ -557,9 +557,9 @@ def stop_memory_tracing(
cumulative_memory_dict[frame][2] += cpu_gpu_mem_inc
cumulative_memory = sorted(
- list(cumulative_memory_dict.items()), key=lambda x: x[1][2], reverse=True
+ cumulative_memory_dict.items(), key=lambda x: x[1][2], reverse=True
) # order by the total CPU + GPU memory increase
- cumulative_memory = list(
+ cumulative_memory = [
MemoryState(
frame=frame,
cpu=Memory(cpu_mem_inc),
@@ -567,7 +567,7 @@ def stop_memory_tracing(
cpu_gpu=Memory(cpu_gpu_mem_inc),
)
for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory
- )
+ ]
memory_curr_trace = sorted(memory_curr_trace, key=lambda x: x.cpu_gpu.bytes, reverse=True)
diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
index e3b4148b39..37268ea34b 100755
--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -324,7 +324,7 @@ class PretrainedConfig(PushToHubMixin):
f"You passed along `num_labels={num_labels}` with an incompatible id to label map: "
f"{self.id2label}. The number of labels wil be overwritten to {self.num_labels}."
)
- self.id2label = dict((int(key), value) for key, value in self.id2label.items())
+ self.id2label = {int(key): value for key, value in self.id2label.items()}
# Keys are always strings in JSON so convert ids to int here.
else:
self.num_labels = kwargs.pop("num_labels", 2)
@@ -696,7 +696,7 @@ class PretrainedConfig(PushToHubMixin):
config = cls(**config_dict)
if hasattr(config, "pruned_heads"):
- config.pruned_heads = dict((int(key), value) for key, value in config.pruned_heads.items())
+ config.pruned_heads = {int(key): value for key, value in config.pruned_heads.items()}
# Update config with kwargs if needed
if "num_labels" in kwargs and "id2label" in kwargs:
diff --git a/src/transformers/deepspeed.py b/src/transformers/deepspeed.py
index 5a76cdf8e1..9dcd7be7f4 100644
--- a/src/transformers/deepspeed.py
+++ b/src/transformers/deepspeed.py
@@ -367,13 +367,13 @@ def deepspeed_init(trainer, num_training_steps, resume_from_checkpoint=None, inf
# keep for quick debug:
# from pprint import pprint; pprint(config)
- kwargs = dict(
- model=model,
- model_parameters=model_parameters,
- config_params=config,
- optimizer=optimizer,
- lr_scheduler=lr_scheduler,
- )
+ kwargs = {
+ "model": model,
+ "model_parameters": model_parameters,
+ "config_params": config,
+ "optimizer": optimizer,
+ "lr_scheduler": lr_scheduler,
+ }
deepspeed_engine, optimizer, _, lr_scheduler = deepspeed.initialize(**kwargs)
diff --git a/src/transformers/feature_extraction_sequence_utils.py b/src/transformers/feature_extraction_sequence_utils.py
index 831d30e390..2121261be0 100644
--- a/src/transformers/feature_extraction_sequence_utils.py
+++ b/src/transformers/feature_extraction_sequence_utils.py
@@ -188,7 +188,7 @@ class SequenceFeatureExtractor(FeatureExtractionMixin):
truncated_inputs = []
for i in range(batch_size):
- inputs = dict((k, v[i]) for k, v in processed_features.items())
+ inputs = {k: v[i] for k, v in processed_features.items()}
# truncation
inputs_slice = self._truncate(
inputs,
diff --git a/src/transformers/generation/beam_constraints.py b/src/transformers/generation/beam_constraints.py
index baf7e3b71e..2563ac23cd 100644
--- a/src/transformers/generation/beam_constraints.py
+++ b/src/transformers/generation/beam_constraints.py
@@ -208,12 +208,12 @@ class DisjunctiveTrie:
"""
self.max_height = max([len(one) for one in nested_token_ids])
- root = dict()
+ root = {}
for token_ids in nested_token_ids:
level = root
for tidx, token_id in enumerate(token_ids):
if token_id not in level:
- level[token_id] = dict()
+ level[token_id] = {}
level = level[token_id]
diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py
index 0bd6095f44..ba777f1e8e 100644
--- a/src/transformers/generation/logits_process.py
+++ b/src/transformers/generation/logits_process.py
@@ -951,7 +951,7 @@ class WhisperTimeStampLogitsProcessor(LogitsProcessor):
# timestamps have to appear in pairs, except directly before eos_token; mask logits accordingly
for k in range(input_ids.shape[0]):
- seq = [t for t in input_ids[k, self.begin_index :].tolist()]
+ seq = list(input_ids[k, self.begin_index :].tolist())
last_was_timestamp = len(seq) >= 1 and seq[-1] >= self.timestamp_begin
penultimate_was_timestamp = len(seq) < 2 or seq[-2] >= self.timestamp_begin
diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py
index b8db1115af..08ec05fa09 100644
--- a/src/transformers/image_utils.py
+++ b/src/transformers/image_utils.py
@@ -115,7 +115,7 @@ def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]:
if is_valid_image(images):
if images.ndim == expected_ndims + 1:
# Batch of images
- images = [image for image in images]
+ images = list(images)
elif images.ndim == expected_ndims:
# Single image
images = [images]
diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py
index 38e23ea5b0..a2effeac63 100644
--- a/src/transformers/integrations.py
+++ b/src/transformers/integrations.py
@@ -365,7 +365,7 @@ def run_hp_search_sigopt(trainer, n_trials: int, direction: str, **kwargs) -> Be
name="huggingface-tune",
type="offline",
parameters=trainer.hp_space(None),
- metrics=[dict(name="objective", objective=direction, strategy="optimize")],
+ metrics=[{"name": "objective", "objective": direction, "strategy": "optimize"}],
parallel_bandwidth=1,
budget=n_trials,
)
@@ -402,7 +402,7 @@ def run_hp_search_sigopt(trainer, n_trials: int, direction: str, **kwargs) -> Be
experiment = conn.experiments().create(
name="huggingface-tune",
parameters=trainer.hp_space(None),
- metrics=[dict(name="objective", objective=direction, strategy="optimize")],
+ metrics=[{"name": "objective", "objective": direction, "strategy": "optimize"}],
parallel_bandwidth=1,
observation_budget=n_trials,
project="huggingface",
@@ -425,7 +425,7 @@ def run_hp_search_sigopt(trainer, n_trials: int, direction: str, **kwargs) -> Be
metrics = trainer.evaluate()
trainer.objective = trainer.compute_objective(metrics)
- values = [dict(name="objective", value=trainer.objective)]
+ values = [{"name": "objective", "value": trainer.objective}]
obs = conn.experiments(experiment.id).observations().create(suggestion=suggestion.id, values=values)
logger.info(f"[suggestion_id, observation_id]: [{suggestion.id}, {obs.id}]")
experiment = conn.experiments(experiment.id).fetch()
diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py
index 4fd2da18a6..c553b0c1e3 100644
--- a/src/transformers/keras_callbacks.py
+++ b/src/transformers/keras_callbacks.py
@@ -162,7 +162,7 @@ class KerasMetricCallback(Callback):
def _postprocess_predictions_or_labels(self, inputs):
if isinstance(inputs[0], dict):
- outputs = dict()
+ outputs = {}
for key in inputs[0].keys():
outputs[key] = self._concatenate_batches([batch[key] for batch in inputs])
# If it's a dict with only one key, just return the array
diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py
index 4c93b810ec..ac954272cd 100644
--- a/src/transformers/modelcard.py
+++ b/src/transformers/modelcard.py
@@ -677,7 +677,7 @@ class TrainingSummary:
_, eval_lines, eval_results = parse_keras_history(keras_history)
else:
eval_lines = []
- eval_results = dict()
+ eval_results = {}
hyperparameters = extract_hyperparameters_from_keras(model)
return cls(
@@ -706,7 +706,7 @@ def parse_keras_history(logs):
# This looks like a `History` object
if not hasattr(logs, "epoch"):
# This history looks empty, return empty results
- return None, [], dict()
+ return None, [], {}
logs.history["epoch"] = logs.epoch
logs = logs.history
else:
@@ -716,7 +716,7 @@ def parse_keras_history(logs):
lines = []
for i in range(len(logs["epoch"])):
epoch_dict = {log_key: log_value_list[i] for log_key, log_value_list in logs.items()}
- values = dict()
+ values = {}
for k, v in epoch_dict.items():
if k.startswith("val_"):
k = "validation_" + k[4:]
@@ -797,7 +797,7 @@ def parse_log_history(log_history):
def extract_hyperparameters_from_keras(model):
import tensorflow as tf
- hyperparameters = dict()
+ hyperparameters = {}
if hasattr(model, "optimizer") and model.optimizer is not None:
hyperparameters["optimizer"] = model.optimizer.get_config()
else:
diff --git a/src/transformers/modeling_flax_pytorch_utils.py b/src/transformers/modeling_flax_pytorch_utils.py
index e013e74eef..c78b1b44cd 100644
--- a/src/transformers/modeling_flax_pytorch_utils.py
+++ b/src/transformers/modeling_flax_pytorch_utils.py
@@ -76,7 +76,7 @@ def rename_key_and_reshape_tensor(
def is_key_or_prefix_key_in_dict(key: Tuple[str]) -> bool:
"""Checks if `key` of `(prefix,) + key` is in random_flax_state_dict"""
- return len(set(random_flax_state_dict) & set([key, (model_prefix,) + key])) > 0
+ return len(set(random_flax_state_dict) & {key, (model_prefix,) + key}) > 0
# layer norm
renamed_pt_tuple_key = pt_tuple_key[:-1] + ("scale",)
@@ -122,10 +122,10 @@ def convert_pytorch_state_dict_to_flax(pt_state_dict, flax_model):
flax_state_dict = {}
load_model_with_head_into_base_model = (model_prefix not in flax_model.params) and (
- model_prefix in set([k.split(".")[0] for k in pt_state_dict.keys()])
+ model_prefix in {k.split(".")[0] for k in pt_state_dict.keys()}
)
load_base_model_into_model_with_head = (model_prefix in flax_model.params) and (
- model_prefix not in set([k.split(".")[0] for k in pt_state_dict.keys()])
+ model_prefix not in {k.split(".")[0] for k in pt_state_dict.keys()}
)
# Need to change some parameters name to match Flax names
@@ -179,10 +179,10 @@ def convert_pytorch_sharded_state_dict_to_flax(shard_filenames, flax_model):
random_flax_state_dict = flatten_dict(flax_model.params)
load_model_with_head_into_base_model = (model_prefix not in flax_model.params) and (
- model_prefix in set([k.split(".")[0] for k in pt_state_dict.keys()])
+ model_prefix in {k.split(".")[0] for k in pt_state_dict.keys()}
)
load_base_model_into_model_with_head = (model_prefix in flax_model.params) and (
- model_prefix not in set([k.split(".")[0] for k in pt_state_dict.keys()])
+ model_prefix not in {k.split(".")[0] for k in pt_state_dict.keys()}
)
# Need to change some parameters name to match Flax names
for pt_key, pt_tensor in pt_state_dict.items():
@@ -267,10 +267,10 @@ def load_flax_weights_in_pytorch_model(pt_model, flax_state):
pt_model_dict = pt_model.state_dict()
load_model_with_head_into_base_model = (pt_model.base_model_prefix in flax_state) and (
- pt_model.base_model_prefix not in set([k.split(".")[0] for k in pt_model_dict.keys()])
+ pt_model.base_model_prefix not in {k.split(".")[0] for k in pt_model_dict.keys()}
)
load_base_model_into_model_with_head = (pt_model.base_model_prefix not in flax_state) and (
- pt_model.base_model_prefix in set([k.split(".")[0] for k in pt_model_dict.keys()])
+ pt_model.base_model_prefix in {k.split(".")[0] for k in pt_model_dict.keys()}
)
# keep track of unexpected & missing keys
diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py
index a635c7b62b..466f324ce8 100644
--- a/src/transformers/modeling_flax_utils.py
+++ b/src/transformers/modeling_flax_utils.py
@@ -440,7 +440,7 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin):
"""
# Load the index
- state_sharded_dict = dict()
+ state_sharded_dict = {}
for shard_file in shard_files:
# load using msgpack utils
@@ -708,19 +708,19 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin):
filename = WEIGHTS_NAME if from_pt else FLAX_WEIGHTS_NAME
try:
# Load from URL or cache if already cached
- cached_file_kwargs = dict(
- cache_dir=cache_dir,
- force_download=force_download,
- proxies=proxies,
- resume_download=resume_download,
- local_files_only=local_files_only,
- use_auth_token=use_auth_token,
- user_agent=user_agent,
- revision=revision,
- subfolder=subfolder,
- _raise_exceptions_for_missing_entries=False,
- _commit_hash=commit_hash,
- )
+ cached_file_kwargs = {
+ "cache_dir": cache_dir,
+ "force_download": force_download,
+ "proxies": proxies,
+ "resume_download": resume_download,
+ "local_files_only": local_files_only,
+ "use_auth_token": use_auth_token,
+ "user_agent": user_agent,
+ "revision": revision,
+ "subfolder": subfolder,
+ "_raise_exceptions_for_missing_entries": False,
+ "_commit_hash": commit_hash,
+ }
resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
# Since we set _raise_exceptions_for_missing_entries=False, we don't get an expection but a None
diff --git a/src/transformers/modeling_tf_pytorch_utils.py b/src/transformers/modeling_tf_pytorch_utils.py
index 9db0f582e2..5465da7427 100644
--- a/src/transformers/modeling_tf_pytorch_utils.py
+++ b/src/transformers/modeling_tf_pytorch_utils.py
@@ -258,7 +258,7 @@ def load_pytorch_state_dict_in_tf2_model(
symbolic_weights = tf_model.trainable_weights + tf_model.non_trainable_weights
tf_loaded_numel = 0
weight_value_tuples = []
- all_pytorch_weights = set(list(pt_state_dict.keys()))
+ all_pytorch_weights = set(pt_state_dict.keys())
missing_keys = []
for symbolic_weight in symbolic_weights:
sw_name = symbolic_weight.name
@@ -425,7 +425,7 @@ def load_tf2_state_dict_in_pytorch_model(pt_model, tf_state_dict, allow_missing_
)
tf_weights_map[pt_name] = (tf_weight, transpose)
- all_tf_weights = set(list(tf_weights_map.keys()))
+ all_tf_weights = set(tf_weights_map.keys())
loaded_pt_weights_data_ptr = {}
missing_keys_pt = []
for pt_weight_name, pt_weight in current_pt_params_dict.items():
diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
index 1a313ec959..c469c13ff0 100644
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -584,7 +584,7 @@ def input_processing(func, config, **kwargs):
if "kwargs" in output:
del output["kwargs"]
- cast_output = dict()
+ cast_output = {}
for key, val in output.items():
if isinstance(val, tf.Tensor) and val.dtype == tf.int64:
cast_output[key] = tf.cast(val, tf.int32)
@@ -737,7 +737,7 @@ def load_tf_sharded_weights(model, shard_files, ignore_mismatched_sizes=False, s
# Since TF adds the name of the class to its weights, and uses the index and not the name of the layer to load
# the weight, we have to get rid of the first prefix of the name of the layer.
model_keys = set()
- model_layer_map = dict()
+ model_layer_map = {}
for i, k in enumerate(model.weights):
if "model." in k.name or len(k.name.split("/")) == 1:
layer_name = k.name
@@ -901,10 +901,10 @@ def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_size
)
# Find the missing layers from the high level list of layers
- missing_layers = list(set([layer.name for layer in model.layers]) - saved_h5_model_layers_name)
+ missing_layers = list({layer.name for layer in model.layers} - saved_h5_model_layers_name)
# Find the unexpected layers from the high level list of layers
- unexpected_layers = list(saved_h5_model_layers_name - set([layer.name for layer in model.layers]))
+ unexpected_layers = list(saved_h5_model_layers_name - {layer.name for layer in model.layers})
saved_weight_names_set = set()
symbolic_weights_names = set()
weight_value_tuples = []
@@ -1349,7 +1349,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
else:
collate_fn = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="np")
if collate_fn_args is None:
- collate_fn_args = dict()
+ collate_fn_args = {}
if not isinstance(dataset, datasets.Dataset):
raise TypeError("Dataset argument should be a datasets.Dataset!")
@@ -1471,7 +1471,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
elif "mc_labels" in arg_names:
return {"labels": "logits", "mc_labels": "mc_logits"}
else:
- return dict()
+ return {}
def train_step(self, data):
"""
@@ -2613,19 +2613,19 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
try:
# Load from URL or cache if already cached
- cached_file_kwargs = dict(
- cache_dir=cache_dir,
- force_download=force_download,
- proxies=proxies,
- resume_download=resume_download,
- local_files_only=local_files_only,
- use_auth_token=use_auth_token,
- user_agent=user_agent,
- revision=revision,
- subfolder=subfolder,
- _raise_exceptions_for_missing_entries=False,
- _commit_hash=commit_hash,
- )
+ cached_file_kwargs = {
+ "cache_dir": cache_dir,
+ "force_download": force_download,
+ "proxies": proxies,
+ "resume_download": resume_download,
+ "local_files_only": local_files_only,
+ "use_auth_token": use_auth_token,
+ "user_agent": user_agent,
+ "revision": revision,
+ "subfolder": subfolder,
+ "_raise_exceptions_for_missing_entries": False,
+ "_commit_hash": commit_hash,
+ }
resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
# Since we set _raise_exceptions_for_missing_entries=False, we don't get an exception but a None
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index bc12cbc668..73e6cf00ef 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -1271,7 +1271,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
len(encoder_modules) > 0
), f"Encoder module {encoder_pointer} does not match decoder module {decoder_pointer}"
- all_encoder_weights = set([module_name + "/" + sub_name for sub_name in encoder_modules.keys()])
+ all_encoder_weights = {module_name + "/" + sub_name for sub_name in encoder_modules.keys()}
encoder_layer_pos = 0
for name, module in decoder_modules.items():
if name.isdigit():
@@ -2304,19 +2304,19 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
try:
# Load from URL or cache if already cached
- cached_file_kwargs = dict(
- cache_dir=cache_dir,
- force_download=force_download,
- proxies=proxies,
- resume_download=resume_download,
- local_files_only=local_files_only,
- use_auth_token=use_auth_token,
- user_agent=user_agent,
- revision=revision,
- subfolder=subfolder,
- _raise_exceptions_for_missing_entries=False,
- _commit_hash=commit_hash,
- )
+ cached_file_kwargs = {
+ "cache_dir": cache_dir,
+ "force_download": force_download,
+ "proxies": proxies,
+ "resume_download": resume_download,
+ "local_files_only": local_files_only,
+ "use_auth_token": use_auth_token,
+ "user_agent": user_agent,
+ "revision": revision,
+ "subfolder": subfolder,
+ "_raise_exceptions_for_missing_entries": False,
+ "_commit_hash": commit_hash,
+ }
resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
# Since we set _raise_exceptions_for_missing_entries=False, we don't get an exception but a None
@@ -2474,7 +2474,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
if is_sharded:
loaded_state_dict_keys = sharded_metadata["all_checkpoint_keys"]
else:
- loaded_state_dict_keys = [k for k in state_dict.keys()]
+ loaded_state_dict_keys = list(state_dict.keys())
if low_cpu_mem_usage or use_keep_in_fp32_modules:
state_dict = None
@@ -3046,12 +3046,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
return model, missing_keys, unexpected_keys, mismatched_keys, offload_index, error_msgs
def retrieve_modules_from_names(self, names, add_prefix=False, remove_prefix=False):
- module_keys = set([".".join(key.split(".")[:-1]) for key in names])
+ module_keys = {".".join(key.split(".")[:-1]) for key in names}
# torch.nn.ParameterList is a special case where two parameter keywords
# are appended to the module name, *e.g.* bert.special_embeddings.0
module_keys = module_keys.union(
- set([".".join(key.split(".")[:-2]) for key in names if len(key) > 0 and key[-1].isdigit()])
+ {".".join(key.split(".")[:-2]) for key in names if len(key) > 0 and key[-1].isdigit()}
)
retrieved_modules = []
diff --git a/src/transformers/models/beit/modeling_flax_beit.py b/src/transformers/models/beit/modeling_flax_beit.py
index 02fb2e5e33..328f759901 100644
--- a/src/transformers/models/beit/modeling_flax_beit.py
+++ b/src/transformers/models/beit/modeling_flax_beit.py
@@ -555,7 +555,7 @@ class FlaxBeitEncoder(nn.Module):
)
# stochastic depth decay rule
- drop_path_rates = [x for x in np.linspace(0, self.config.drop_path_rate, self.config.num_hidden_layers)]
+ drop_path_rates = list(np.linspace(0, self.config.drop_path_rate, self.config.num_hidden_layers))
self.layer = FlaxBeitLayerCollection(
self.config,
window_size=self.window_size,
diff --git a/src/transformers/models/bertweet/tokenization_bertweet.py b/src/transformers/models/bertweet/tokenization_bertweet.py
index 837fea1367..129806ebd3 100644
--- a/src/transformers/models/bertweet/tokenization_bertweet.py
+++ b/src/transformers/models/bertweet/tokenization_bertweet.py
@@ -318,7 +318,7 @@ class BertweetTokenizer(PreTrainedTokenizer):
split_tokens = []
words = re.findall(r"\S+\n?", text)
for token in words:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
def normalizeTweet(self, tweet):
@@ -726,7 +726,7 @@ class TweetTokenizer:
words = WORD_RE.findall(safe_text)
# Possibly alter the case, but avoid changing emoticons like :D into :d:
if not self.preserve_case:
- words = list(map((lambda x: x if EMOTICON_RE.search(x) else x.lower()), words))
+ words = [x if EMOTICON_RE.search(x) else x.lower() for x in words]
return words
diff --git a/src/transformers/models/big_bird/tokenization_big_bird_fast.py b/src/transformers/models/big_bird/tokenization_big_bird_fast.py
index 11c3386794..c41c257d53 100644
--- a/src/transformers/models/big_bird/tokenization_big_bird_fast.py
+++ b/src/transformers/models/big_bird/tokenization_big_bird_fast.py
@@ -202,7 +202,7 @@ class BigBirdTokenizerFast(PreTrainedTokenizerFast):
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
)
- return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
+ return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0]
if token_ids_1 is None:
return [1] + ([0] * len(token_ids_0)) + [1]
diff --git a/src/transformers/models/biogpt/tokenization_biogpt.py b/src/transformers/models/biogpt/tokenization_biogpt.py
index 55f337f2ec..d050fa699c 100644
--- a/src/transformers/models/biogpt/tokenization_biogpt.py
+++ b/src/transformers/models/biogpt/tokenization_biogpt.py
@@ -132,8 +132,8 @@ class BioGptTokenizer(PreTrainedTokenizer):
self.lang = "en"
self.sm = sacremoses
# cache of sm.MosesTokenizer instance
- self.cache_moses_tokenizer = dict()
- self.cache_moses_detokenizer = dict()
+ self.cache_moses_tokenizer = {}
+ self.cache_moses_detokenizer = {}
""" Initialisation"""
with open(vocab_file, encoding="utf-8") as vocab_handle:
@@ -221,7 +221,7 @@ class BioGptTokenizer(PreTrainedTokenizer):
split_tokens = []
for token in text:
if token:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
index a0b45bff1d..e26cdfbd98 100644
--- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
+++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py
@@ -191,7 +191,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer):
words = re.findall(r"\S+\n?", text)
for token in words:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
def _convert_token_to_id(self, token: str) -> int:
diff --git a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
index c8a069784d..3942de2358 100644
--- a/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
@@ -89,7 +89,7 @@ def convert_bloom_checkpoint_to_pytorch(
if shard_model:
file_names = os.listdir(bloom_checkpoint_path)
- file_names = list(sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names)))
+ file_names = sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names))
index_dict = {"weight_map": {}, "metadata": {}}
total_size = 0
@@ -157,7 +157,7 @@ def convert_bloom_checkpoint_to_pytorch(
model = BloomModel(config)
file_names = os.listdir(bloom_checkpoint_path)
- file_names = list(sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names)))
+ file_names = sorted(filter(lambda s: s.startswith("layer") and "model_00" in s, file_names))
missing_keys = None
for i, file in enumerate(file_names):
diff --git a/src/transformers/models/codegen/modeling_codegen.py b/src/transformers/models/codegen/modeling_codegen.py
index fb7716a00e..b564dcdb68 100644
--- a/src/transformers/models/codegen/modeling_codegen.py
+++ b/src/transformers/models/codegen/modeling_codegen.py
@@ -85,7 +85,7 @@ def duplicate_interleave(m):
# Copied from transformers.models.gptj.modeling_gptj.apply_rotary_pos_emb
def apply_rotary_pos_emb(x, sincos, offset=0):
- sin, cos = map(lambda t: duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :], sincos)
+ sin, cos = (duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :] for t in sincos)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
return (x * cos) + (rotate_every_two(x) * sin)
diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
index d4e2f9dd5f..0d7e9aa0da 100644
--- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
+++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
@@ -604,7 +604,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
- return [x for x in runs]
+ return list(runs)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
diff --git a/src/transformers/models/convnext/modeling_convnext.py b/src/transformers/models/convnext/modeling_convnext.py
index 5e60ddfe6d..3ba8062b77 100755
--- a/src/transformers/models/convnext/modeling_convnext.py
+++ b/src/transformers/models/convnext/modeling_convnext.py
@@ -495,7 +495,7 @@ class ConvNextBackbone(ConvNextPreTrainedModel, BackboneMixin):
self.out_feature_channels = out_feature_channels
# Add layer norms to hidden states of out_features
- hidden_states_norms = dict()
+ hidden_states_norms = {}
for stage, num_channels in zip(self.out_features, self.channels):
hidden_states_norms[stage] = ConvNextLayerNorm(num_channels, data_format="channels_first")
self.hidden_states_norms = nn.ModuleDict(hidden_states_norms)
diff --git a/src/transformers/models/ctrl/tokenization_ctrl.py b/src/transformers/models/ctrl/tokenization_ctrl.py
index f8524bdf1f..7a81bf8572 100644
--- a/src/transformers/models/ctrl/tokenization_ctrl.py
+++ b/src/transformers/models/ctrl/tokenization_ctrl.py
@@ -208,7 +208,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
words = re.findall(r"\S+\n?", text)
for token in words:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
def _convert_token_to_id(self, token):
diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
index c837670b1a..8bf8a88550 100644
--- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
+++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
@@ -596,7 +596,7 @@ class TFData2VecVisionEncoder(tf.keras.layers.Layer):
self.relative_position_bias = None
# stochastic depth decay rule
- dpr = [x for x in tf.linspace(0.0, config.drop_path_rate, config.num_hidden_layers)]
+ dpr = list(tf.linspace(0.0, config.drop_path_rate, config.num_hidden_layers))
self.layer = [
TFData2VecVisionLayer(
config,
diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
index 3601a2aad1..5b6d9839e9 100644
--- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
@@ -602,7 +602,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
- return [x for x in runs]
+ return list(runs)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py
index 433853efef..75132b9a2f 100644
--- a/src/transformers/models/detr/image_processing_detr.py
+++ b/src/transformers/models/detr/image_processing_detr.py
@@ -590,7 +590,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
- return [x for x in runs]
+ return list(runs)
# TODO - (Amy) make compatible with other frameworks
diff --git a/src/transformers/models/dinat/modeling_dinat.py b/src/transformers/models/dinat/modeling_dinat.py
index ef19005834..95191d52b5 100644
--- a/src/transformers/models/dinat/modeling_dinat.py
+++ b/src/transformers/models/dinat/modeling_dinat.py
@@ -899,7 +899,7 @@ class DinatBackbone(DinatPreTrainedModel, BackboneMixin):
self.out_feature_channels[stage] = num_features[i]
# Add layer norms to hidden states of out_features
- hidden_states_norms = dict()
+ hidden_states_norms = {}
for stage, num_channels in zip(self.out_features, self.channels):
hidden_states_norms[stage] = nn.LayerNorm(num_channels)
self.hidden_states_norms = nn.ModuleDict(hidden_states_norms)
diff --git a/src/transformers/models/donut/processing_donut.py b/src/transformers/models/donut/processing_donut.py
index 87f2dd34f9..5693fe110d 100644
--- a/src/transformers/models/donut/processing_donut.py
+++ b/src/transformers/models/donut/processing_donut.py
@@ -130,7 +130,7 @@ class DonutProcessor(ProcessorMixin):
if added_vocab is None:
added_vocab = self.tokenizer.get_added_vocab()
- output = dict()
+ output = {}
while tokens:
start_token = re.search(r"", tokens, re.IGNORECASE)
diff --git a/src/transformers/models/ernie_m/tokenization_ernie_m.py b/src/transformers/models/ernie_m/tokenization_ernie_m.py
index e56451dd20..1acc113dca 100644
--- a/src/transformers/models/ernie_m/tokenization_ernie_m.py
+++ b/src/transformers/models/ernie_m/tokenization_ernie_m.py
@@ -133,8 +133,8 @@ class ErnieMTokenizer(PreTrainedTokenizer):
if vocab_file is not None:
self.vocab = self.load_vocab(filepath=vocab_file)
else:
- self.vocab = dict((self.sp_model.id_to_piece(id), id) for id in range(self.sp_model.get_piece_size()))
- self.reverse_vocab = dict((v, k) for k, v in self.vocab.items())
+ self.vocab = {self.sp_model.id_to_piece(id): id for id in range(self.sp_model.get_piece_size())}
+ self.reverse_vocab = {v: k for k, v in self.vocab.items()}
def get_offset_mapping(self, text):
if text is None:
@@ -325,7 +325,7 @@ class ErnieMTokenizer(PreTrainedTokenizer):
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
)
- return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
+ return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0]
if token_ids_1 is not None:
return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
diff --git a/src/transformers/models/esm/modeling_esmfold.py b/src/transformers/models/esm/modeling_esmfold.py
index d37891df35..05c165f586 100644
--- a/src/transformers/models/esm/modeling_esmfold.py
+++ b/src/transformers/models/esm/modeling_esmfold.py
@@ -201,9 +201,9 @@ def collate_dense_tensors(samples: List[torch.Tensor], pad_v: float = 0) -> torc
"""
if len(samples) == 0:
return torch.Tensor()
- if len(set(x.dim() for x in samples)) != 1:
+ if len({x.dim() for x in samples}) != 1:
raise RuntimeError(f"Samples has varying dimensions: {[x.dim() for x in samples]}")
- (device,) = tuple(set(x.device for x in samples)) # assumes all on same device
+ (device,) = tuple({x.device for x in samples}) # assumes all on same device
max_shape = [max(lst) for lst in zip(*[x.shape for x in samples])]
result = torch.empty(len(samples), *max_shape, dtype=samples[0].dtype, device=device)
result.fill_(pad_v)
diff --git a/src/transformers/models/esm/openfold_utils/chunk_utils.py b/src/transformers/models/esm/openfold_utils/chunk_utils.py
index 4b60373438..301721d135 100644
--- a/src/transformers/models/esm/openfold_utils/chunk_utils.py
+++ b/src/transformers/models/esm/openfold_utils/chunk_utils.py
@@ -83,7 +83,7 @@ def _get_minimal_slice_set(
# Base cases. Either start/end are empty and we're done, or the final,
# one-dimensional tensor can be simply sliced
if len(start) == 0:
- return [tuple()]
+ return [()]
elif len(start) == 1:
return [(slice(start[0], end[0] + 1),)]
diff --git a/src/transformers/models/flaubert/tokenization_flaubert.py b/src/transformers/models/flaubert/tokenization_flaubert.py
index 26f68e75d7..ea3f1c8bfd 100644
--- a/src/transformers/models/flaubert/tokenization_flaubert.py
+++ b/src/transformers/models/flaubert/tokenization_flaubert.py
@@ -282,10 +282,10 @@ class FlaubertTokenizer(PreTrainedTokenizer):
self.sm = sacremoses
# cache of sm.MosesPunctNormalizer instance
- self.cache_moses_punct_normalizer = dict()
+ self.cache_moses_punct_normalizer = {}
# cache of sm.MosesTokenizer instance
- self.cache_moses_tokenizer = dict()
- self.lang_with_custom_tokenizer = set(["zh", "th", "ja"])
+ self.cache_moses_tokenizer = {}
+ self.lang_with_custom_tokenizer = {"zh", "th", "ja"}
self.lang2id = lang2id
self.id2lang = id2lang
if lang2id is not None and id2lang is not None:
@@ -452,7 +452,7 @@ class FlaubertTokenizer(PreTrainedTokenizer):
split_tokens = []
for token in text:
if token:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
diff --git a/src/transformers/models/fsmt/tokenization_fsmt.py b/src/transformers/models/fsmt/tokenization_fsmt.py
index 1c401c1faa..523f2ed588 100644
--- a/src/transformers/models/fsmt/tokenization_fsmt.py
+++ b/src/transformers/models/fsmt/tokenization_fsmt.py
@@ -226,10 +226,10 @@ class FSMTTokenizer(PreTrainedTokenizer):
self.do_lower_case = do_lower_case
# cache of sm.MosesPunctNormalizer instance
- self.cache_moses_punct_normalizer = dict()
+ self.cache_moses_punct_normalizer = {}
# cache of sm.MosesTokenizer instance
- self.cache_moses_tokenizer = dict()
- self.cache_moses_detokenizer = dict()
+ self.cache_moses_tokenizer = {}
+ self.cache_moses_detokenizer = {}
if langs and len(langs) == 2:
self.src_lang, self.tgt_lang = langs
@@ -379,7 +379,7 @@ class FSMTTokenizer(PreTrainedTokenizer):
split_tokens = []
for token in text:
if token:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py
index b7070fa0ac..f9c49db52d 100755
--- a/src/transformers/models/gptj/modeling_gptj.py
+++ b/src/transformers/models/gptj/modeling_gptj.py
@@ -78,7 +78,7 @@ def duplicate_interleave(m):
def apply_rotary_pos_emb(x, sincos, offset=0):
- sin, cos = map(lambda t: duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :], sincos)
+ sin, cos = (duplicate_interleave(t)[None, offset : x.shape[1] + offset, None, :] for t in sincos)
# einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
return (x * cos) + (rotate_every_two(x) * sin)
diff --git a/src/transformers/models/herbert/tokenization_herbert.py b/src/transformers/models/herbert/tokenization_herbert.py
index 80c6cb6d63..3d07e68e18 100644
--- a/src/transformers/models/herbert/tokenization_herbert.py
+++ b/src/transformers/models/herbert/tokenization_herbert.py
@@ -348,10 +348,10 @@ class HerbertTokenizer(PreTrainedTokenizer):
self.sm = sacremoses
# cache of sm.MosesPunctNormalizer instance
- self.cache_moses_punct_normalizer = dict()
+ self.cache_moses_punct_normalizer = {}
# cache of sm.MosesTokenizer instance
- self.cache_moses_tokenizer = dict()
- self.lang_with_custom_tokenizer = set(["zh", "th", "ja"])
+ self.cache_moses_tokenizer = {}
+ self.lang_with_custom_tokenizer = {"zh", "th", "ja"}
# True for current supported model (v1.2.0), False for XLM-17 & 100
self.do_lowercase_and_remove_accent = do_lowercase_and_remove_accent
self.lang2id = lang2id
@@ -490,7 +490,7 @@ class HerbertTokenizer(PreTrainedTokenizer):
split_tokens = []
for token in pre_tokens:
if token:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
diff --git a/src/transformers/models/jukebox/modeling_jukebox.py b/src/transformers/models/jukebox/modeling_jukebox.py
index 2528f1aa22..cac9300539 100755
--- a/src/transformers/models/jukebox/modeling_jukebox.py
+++ b/src/transformers/models/jukebox/modeling_jukebox.py
@@ -138,7 +138,7 @@ def get_alignment(music_tokens, labels, prior, config):
hop_length = int(config.hop_fraction[-level - 1] * prior.n_ctx)
alignment_head, alignment_layer = config.prior_alignment_head[0], config.prior_alignment_layer[0]
- attn_layers = set([alignment_layer])
+ attn_layers = {alignment_layer}
alignment_hops = {}
indices_hops = {}
for start in tqdm(get_starts(total_length, n_ctx, hop_length), desc="Computing lyric to music alignment "):
@@ -436,7 +436,7 @@ class JukeboxBottleneckBlock(nn.Module):
used_curr = (_codebook_elem >= self.threshold).sum()
usage = torch.sum(usage)
dk = torch.norm(self.codebook - old_codebook) / np.sqrt(np.prod(old_codebook.shape))
- return dict(entropy=entropy, used_curr=used_curr, usage=usage, dk=dk)
+ return {"entropy": entropy, "used_curr": used_curr, "usage": usage, "dk": dk}
def preprocess(self, hidden_states):
hidden_states = hidden_states.permute(0, 2, 1).contiguous()
@@ -2213,11 +2213,11 @@ class JukeboxPrior(PreTrainedModel):
loss = self.encoder_loss_fraction * encoder_loss * self.nb_relevant_lyric_tokens / self.total_loss_dims
loss += next_token_prediction_loss * self.next_token_prediction_loss_dims / self.total_loss_dims
- metrics = dict(
- bpd=next_token_prediction_loss.clone().detach(),
- encoder_loss=encoder_loss.clone().detach(),
- next_token_prediction_loss=next_token_prediction_loss.clone().detach(),
- )
+ metrics = {
+ "bpd": next_token_prediction_loss.clone().detach(),
+ "encoder_loss": encoder_loss.clone().detach(),
+ "next_token_prediction_loss": next_token_prediction_loss.clone().detach(),
+ }
if get_preds:
metrics["preds"] = preds.clone().detach()
if get_attn_weights:
@@ -2533,11 +2533,11 @@ class JukeboxModel(JukeboxPreTrainedModel):
# total length of the signal, might be bit different from the actual generated length
self.total_length = total_length
for level in sample_levels:
- sampling_kwargs = dict(
- temp=0.99 if level == len(self.priors) - 1 else sampling_temperature,
- chunk_size=chunk_size,
- sample_tokens=sample_tokens,
- )
+ sampling_kwargs = {
+ "temp": 0.99 if level == len(self.priors) - 1 else sampling_temperature,
+ "chunk_size": chunk_size,
+ "sample_tokens": sample_tokens,
+ }
# Set correct total_length, hop_length, labels and sampling_kwargs for level
total_token_to_sample = total_length // self.priors[level].raw_to_tokens
diff --git a/src/transformers/models/jukebox/tokenization_jukebox.py b/src/transformers/models/jukebox/tokenization_jukebox.py
index 85835c6cdf..bd4d6721da 100644
--- a/src/transformers/models/jukebox/tokenization_jukebox.py
+++ b/src/transformers/models/jukebox/tokenization_jukebox.py
@@ -187,7 +187,7 @@ class JukeboxTokenizer(PreTrainedTokenizer):
Do NOT take care of added tokens. Only the lyrics are split into character for the character-based vocabulary.
"""
# only lyrics are not tokenized, but character based is easily handled
- return [character for character in lyrics]
+ return list(lyrics)
def tokenize(self, artist, genre, lyrics, **kwargs):
"""
diff --git a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py
index d2b2323b28..c86fa6e308 100644
--- a/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py
@@ -42,7 +42,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
# Add special tokens to the token vocabulary for downstream tasks
entity_token_1 = AddedToken("", lstrip=False, rstrip=False)
entity_token_2 = AddedToken("", lstrip=False, rstrip=False)
- tokenizer.add_special_tokens(dict(additional_special_tokens=[entity_token_1, entity_token_2]))
+ tokenizer.add_special_tokens({"additional_special_tokens": [entity_token_1, entity_token_2]})
config.vocab_size += 2
print(f"Saving tokenizer to {pytorch_dump_folder_path}")
diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py
index ff177a4444..89fb9b63e8 100644
--- a/src/transformers/models/luke/tokenization_luke.py
+++ b/src/transformers/models/luke/tokenization_luke.py
@@ -1529,7 +1529,7 @@ class LukeTokenizer(PreTrainedTokenizer):
batch_outputs = {}
for i in range(batch_size):
- inputs = dict((k, v[i]) for k, v in encoded_inputs.items())
+ inputs = {k: v[i] for k, v in encoded_inputs.items()}
outputs = self._pad(
inputs,
max_length=max_length,
diff --git a/src/transformers/models/marian/convert_marian_to_pytorch.py b/src/transformers/models/marian/convert_marian_to_pytorch.py
index 1662ffb358..0eb17063c2 100644
--- a/src/transformers/models/marian/convert_marian_to_pytorch.py
+++ b/src/transformers/models/marian/convert_marian_to_pytorch.py
@@ -185,12 +185,12 @@ def convert_hf_name_to_opus_name(hf_model_name):
def get_system_metadata(repo_root):
import git
- return dict(
- helsinki_git_sha=git.Repo(path=repo_root, search_parent_directories=True).head.object.hexsha,
- transformers_git_sha=git.Repo(path=".", search_parent_directories=True).head.object.hexsha,
- port_machine=socket.gethostname(),
- port_time=time.strftime("%Y-%m-%d-%H:%M"),
- )
+ return {
+ "helsinki_git_sha": git.Repo(path=repo_root, search_parent_directories=True).head.object.hexsha,
+ "transformers_git_sha": git.Repo(path=".", search_parent_directories=True).head.object.hexsha,
+ "port_machine": socket.gethostname(),
+ "port_time": time.strftime("%Y-%m-%d-%H:%M"),
+ }
# docstyle-ignore
@@ -366,7 +366,7 @@ def _parse_readme(lns):
def save_tokenizer_config(dest_dir: Path, separate_vocabs=False):
dname = dest_dir.name.split("-")
- dct = dict(target_lang=dname[-1], source_lang="-".join(dname[:-1]), separate_vocabs=separate_vocabs)
+ dct = {"target_lang": dname[-1], "source_lang": "-".join(dname[:-1]), "separate_vocabs": separate_vocabs}
save_json(dct, dest_dir / "tokenizer_config.json")
diff --git a/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py
index ea3e530ded..20ff7e780d 100644
--- a/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py
@@ -76,7 +76,7 @@ class TrackedStateDict:
Returns:
List[str]: List of keys not yet updated
"""
- return set(list(self.to_track.keys())) - self._seen
+ return set(self.to_track.keys()) - self._seen
def copy(self) -> Dict:
# proxy the call to the internal dictionary
diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py
index eb93391fb3..501c4ccce7 100644
--- a/src/transformers/models/mask2former/image_processing_mask2former.py
+++ b/src/transformers/models/mask2former/image_processing_mask2former.py
@@ -119,7 +119,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
- return [x for x in runs]
+ return list(runs)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
diff --git a/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
index d56777d452..1942f03666 100644
--- a/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
@@ -72,7 +72,7 @@ class TrackedStateDict:
Returns:
List[str]: List of keys not yet updated
"""
- return set(list(self.to_track.keys())) - self._seen
+ return set(self.to_track.keys()) - self._seen
def copy(self) -> Dict:
# proxy the call to the internal dictionary
@@ -120,43 +120,43 @@ class OriginalMaskFormerConfigToOursConverter:
num_labels=model.SEM_SEG_HEAD.NUM_CLASSES,
no_object_weight=mask_former.NO_OBJECT_WEIGHT,
num_queries=mask_former.NUM_OBJECT_QUERIES,
- backbone_config=dict(
- pretrain_img_size=swin.PRETRAIN_IMG_SIZE,
- image_size=swin.PRETRAIN_IMG_SIZE,
- in_channels=3,
- patch_size=swin.PATCH_SIZE,
- embed_dim=swin.EMBED_DIM,
- depths=swin.DEPTHS,
- num_heads=swin.NUM_HEADS,
- window_size=swin.WINDOW_SIZE,
- drop_path_rate=swin.DROP_PATH_RATE,
- model_type="swin",
- ),
+ backbone_config={
+ "pretrain_img_size": swin.PRETRAIN_IMG_SIZE,
+ "image_size": swin.PRETRAIN_IMG_SIZE,
+ "in_channels": 3,
+ "patch_size": swin.PATCH_SIZE,
+ "embed_dim": swin.EMBED_DIM,
+ "depths": swin.DEPTHS,
+ "num_heads": swin.NUM_HEADS,
+ "window_size": swin.WINDOW_SIZE,
+ "drop_path_rate": swin.DROP_PATH_RATE,
+ "model_type": "swin",
+ },
dice_weight=mask_former.DICE_WEIGHT,
ce_weight=1.0,
mask_weight=mask_former.MASK_WEIGHT,
- decoder_config=dict(
- model_type="detr",
- max_position_embeddings=1024,
- encoder_layers=6,
- encoder_ffn_dim=2048,
- encoder_attention_heads=8,
- decoder_layers=mask_former.DEC_LAYERS,
- decoder_ffn_dim=mask_former.DIM_FEEDFORWARD,
- decoder_attention_heads=mask_former.NHEADS,
- encoder_layerdrop=0.0,
- decoder_layerdrop=0.0,
- d_model=mask_former.HIDDEN_DIM,
- dropout=mask_former.DROPOUT,
- attention_dropout=0.0,
- activation_dropout=0.0,
- init_std=0.02,
- init_xavier_std=1.0,
- scale_embedding=False,
- auxiliary_loss=False,
- dilation=False,
+ decoder_config={
+ "model_type": "detr",
+ "max_position_embeddings": 1024,
+ "encoder_layers": 6,
+ "encoder_ffn_dim": 2048,
+ "encoder_attention_heads": 8,
+ "decoder_layers": mask_former.DEC_LAYERS,
+ "decoder_ffn_dim": mask_former.DIM_FEEDFORWARD,
+ "decoder_attention_heads": mask_former.NHEADS,
+ "encoder_layerdrop": 0.0,
+ "decoder_layerdrop": 0.0,
+ "d_model": mask_former.HIDDEN_DIM,
+ "dropout": mask_former.DROPOUT,
+ "attention_dropout": 0.0,
+ "activation_dropout": 0.0,
+ "init_std": 0.02,
+ "init_xavier_std": 1.0,
+ "scale_embedding": False,
+ "auxiliary_loss": False,
+ "dilation": False,
# default pretrained config values
- ),
+ },
id2label=id2label,
label2id=label2id,
)
diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py
index 6c3119fd30..7457d1eacd 100644
--- a/src/transformers/models/maskformer/image_processing_maskformer.py
+++ b/src/transformers/models/maskformer/image_processing_maskformer.py
@@ -123,7 +123,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
- return [x for x in runs]
+ return list(runs)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
diff --git a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py
index 9d61c3bc8e..f361082fb3 100644
--- a/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py
@@ -46,7 +46,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
# Add special tokens to the token vocabulary for downstream tasks
entity_token_1 = AddedToken("", lstrip=False, rstrip=False)
entity_token_2 = AddedToken("", lstrip=False, rstrip=False)
- tokenizer.add_special_tokens(dict(additional_special_tokens=[entity_token_1, entity_token_2]))
+ tokenizer.add_special_tokens({"additional_special_tokens": [entity_token_1, entity_token_2]})
config.vocab_size += 2
print(f"Saving tokenizer to {pytorch_dump_folder_path}")
diff --git a/src/transformers/models/mluke/tokenization_mluke.py b/src/transformers/models/mluke/tokenization_mluke.py
index 58cc9f11ab..c95bd69848 100644
--- a/src/transformers/models/mluke/tokenization_mluke.py
+++ b/src/transformers/models/mluke/tokenization_mluke.py
@@ -1328,7 +1328,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
batch_outputs = {}
for i in range(batch_size):
- inputs = dict((k, v[i]) for k, v in encoded_inputs.items())
+ inputs = {k: v[i] for k, v in encoded_inputs.items()}
outputs = self._pad(
inputs,
max_length=max_length,
diff --git a/src/transformers/models/nat/modeling_nat.py b/src/transformers/models/nat/modeling_nat.py
index d455d9e5ee..4b34fe730c 100644
--- a/src/transformers/models/nat/modeling_nat.py
+++ b/src/transformers/models/nat/modeling_nat.py
@@ -877,7 +877,7 @@ class NatBackbone(NatPreTrainedModel, BackboneMixin):
self.out_feature_channels[stage] = num_features[i]
# Add layer norms to hidden states of out_features
- hidden_states_norms = dict()
+ hidden_states_norms = {}
for stage, num_channels in zip(self.out_features, self.channels):
hidden_states_norms[stage] = nn.LayerNorm(num_channels)
self.hidden_states_norms = nn.ModuleDict(hidden_states_norms)
diff --git a/src/transformers/models/oneformer/convert_to_hf_oneformer.py b/src/transformers/models/oneformer/convert_to_hf_oneformer.py
index bfe2aee5e2..9dbd32f9d3 100644
--- a/src/transformers/models/oneformer/convert_to_hf_oneformer.py
+++ b/src/transformers/models/oneformer/convert_to_hf_oneformer.py
@@ -82,7 +82,7 @@ class TrackedStateDict:
Returns:
List[str]: List of keys not yet updated
"""
- return set(list(self.to_track.keys())) - self._seen
+ return set(self.to_track.keys()) - self._seen
def copy(self) -> Dict:
# proxy the call to the internal dictionary
diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py
index b1e93c9e39..2573844995 100644
--- a/src/transformers/models/oneformer/image_processing_oneformer.py
+++ b/src/transformers/models/oneformer/image_processing_oneformer.py
@@ -120,7 +120,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
- return [x for x in runs]
+ return list(runs)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
diff --git a/src/transformers/models/openai/tokenization_openai.py b/src/transformers/models/openai/tokenization_openai.py
index 96fd492dbb..36035eafec 100644
--- a/src/transformers/models/openai/tokenization_openai.py
+++ b/src/transformers/models/openai/tokenization_openai.py
@@ -342,12 +342,12 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
# Using BERT's BasicTokenizer
text = self.nlp.tokenize(text)
for token in text:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
else:
# Using SpaCy & ftfy (original tokenization process of OpenAI GPT)
text = self.nlp(text_standardize(self.fix_text(text)))
for token in text:
- split_tokens.extend([t for t in self.bpe(token.text.lower()).split(" ")])
+ split_tokens.extend(list(self.bpe(token.text.lower()).split(" ")))
return split_tokens
def _convert_token_to_id(self, token):
diff --git a/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py b/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py
index d2ea6b0a6c..934c23b4d3 100644
--- a/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py
+++ b/src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py
@@ -37,42 +37,42 @@ from transformers import (
CONFIGS = {
- "vit_b32": dict(
- embed_dim=512,
- image_resolution=768,
- context_length=16,
- vocab_size=49408,
- vision_layers=12,
- vision_width=768,
- vision_patch_size=32,
- transformer_width=512,
- transformer_heads=8,
- transformer_layers=12,
- ),
- "vit_b16": dict(
- embed_dim=512,
- image_resolution=768,
- context_length=16,
- vocab_size=49408,
- vision_layers=12,
- vision_width=768,
- vision_patch_size=16,
- transformer_width=512,
- transformer_heads=8,
- transformer_layers=12,
- ),
- "vit_l14": dict(
- embed_dim=768,
- image_resolution=840,
- context_length=16,
- vocab_size=49408,
- vision_layers=24,
- vision_width=1024,
- vision_patch_size=14,
- transformer_width=768,
- transformer_heads=12,
- transformer_layers=12,
- ),
+ "vit_b32": {
+ "embed_dim": 512,
+ "image_resolution": 768,
+ "context_length": 16,
+ "vocab_size": 49408,
+ "vision_layers": 12,
+ "vision_width": 768,
+ "vision_patch_size": 32,
+ "transformer_width": 512,
+ "transformer_heads": 8,
+ "transformer_layers": 12,
+ },
+ "vit_b16": {
+ "embed_dim": 512,
+ "image_resolution": 768,
+ "context_length": 16,
+ "vocab_size": 49408,
+ "vision_layers": 12,
+ "vision_width": 768,
+ "vision_patch_size": 16,
+ "transformer_width": 512,
+ "transformer_heads": 8,
+ "transformer_layers": 12,
+ },
+ "vit_l14": {
+ "embed_dim": 768,
+ "image_resolution": 840,
+ "context_length": 16,
+ "vocab_size": 49408,
+ "vision_layers": 24,
+ "vision_width": 1024,
+ "vision_patch_size": 14,
+ "transformer_width": 768,
+ "transformer_heads": 12,
+ "transformer_layers": 12,
+ },
}
diff --git a/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py b/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py
index 9c925313a3..9b9b3cb454 100644
--- a/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py
+++ b/src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py
@@ -283,7 +283,7 @@ def convert_perceiver_checkpoint(pickle_file, pytorch_dump_folder_path, architec
params = checkpoint
# turn into initial state dict
- state_dict = dict()
+ state_dict = {}
for scope_name, parameters in hk.data_structures.to_mutable_dict(params).items():
for param_name, param in parameters.items():
state_dict[scope_name + "/" + param_name] = param
@@ -398,7 +398,7 @@ def convert_perceiver_checkpoint(pickle_file, pytorch_dump_folder_path, architec
elif architecture == "multimodal_autoencoding":
images = torch.randn((1, 16, 3, 224, 224))
audio = torch.randn((1, 30720, 1))
- inputs = dict(image=images, audio=audio, label=torch.zeros((images.shape[0], 700)))
+ inputs = {"image": images, "audio": audio, "label": torch.zeros((images.shape[0], 700))}
# forward pass
if architecture == "multimodal_autoencoding":
diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py
index c9b06fcded..7008b04ec8 100755
--- a/src/transformers/models/perceiver/modeling_perceiver.py
+++ b/src/transformers/models/perceiver/modeling_perceiver.py
@@ -957,9 +957,10 @@ class PerceiverForMaskedLM(PerceiverPreTrainedModel):
text_preprocessor = PerceiverTextPreprocessor(config)
- trainable_position_encoding_kwargs_decoder = dict(
- num_channels=text_preprocessor.num_channels, index_dims=config.max_position_embeddings
- )
+ trainable_position_encoding_kwargs_decoder = {
+ "num_channels": text_preprocessor.num_channels,
+ "index_dims": config.max_position_embeddings,
+ }
self.perceiver = PerceiverModel(
config,
@@ -1089,7 +1090,7 @@ class PerceiverForSequenceClassification(PerceiverPreTrainedModel):
def __init__(self, config):
super().__init__(config)
- trainable_position_encoding_kwargs_decoder = dict(num_channels=config.d_latents, index_dims=1)
+ trainable_position_encoding_kwargs_decoder = {"num_channels": config.d_latents, "index_dims": 1}
self.num_labels = config.num_labels
self.perceiver = PerceiverModel(
@@ -1214,8 +1215,8 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel):
def __init__(self, config):
super().__init__(config)
- trainable_position_encoding_kwargs_preprocessor = dict(num_channels=256, index_dims=config.image_size**2)
- trainable_position_encoding_kwargs_decoder = dict(num_channels=config.d_latents, index_dims=1)
+ trainable_position_encoding_kwargs_preprocessor = {"num_channels": 256, "index_dims": config.image_size**2}
+ trainable_position_encoding_kwargs_decoder = {"num_channels": config.d_latents, "index_dims": 1}
self.num_labels = config.num_labels
self.perceiver = PerceiverModel(
@@ -1357,10 +1358,13 @@ class PerceiverForImageClassificationFourier(PerceiverPreTrainedModel):
def __init__(self, config):
super().__init__(config)
- fourier_position_encoding_kwargs_preprocessor = dict(
- concat_pos=True, max_resolution=(224, 224), num_bands=64, sine_only=False
- )
- trainable_position_encoding_kwargs_decoder = dict(num_channels=config.d_latents, index_dims=1)
+ fourier_position_encoding_kwargs_preprocessor = {
+ "concat_pos": True,
+ "max_resolution": (224, 224),
+ "num_bands": 64,
+ "sine_only": False,
+ }
+ trainable_position_encoding_kwargs_decoder = {"num_channels": config.d_latents, "index_dims": 1}
self.num_labels = config.num_labels
self.perceiver = PerceiverModel(
@@ -1497,10 +1501,13 @@ class PerceiverForImageClassificationConvProcessing(PerceiverPreTrainedModel):
def __init__(self, config):
super().__init__(config)
- fourier_position_encoding_kwargs_preprocessor = dict(
- concat_pos=True, max_resolution=(56, 56), num_bands=64, sine_only=False
- )
- trainable_position_encoding_kwargs_decoder = dict(num_channels=config.d_latents, index_dims=1)
+ fourier_position_encoding_kwargs_preprocessor = {
+ "concat_pos": True,
+ "max_resolution": (56, 56),
+ "num_bands": 64,
+ "sine_only": False,
+ }
+ trainable_position_encoding_kwargs_decoder = {"num_channels": config.d_latents, "index_dims": 1}
self.num_labels = config.num_labels
self.perceiver = PerceiverModel(
@@ -1638,15 +1645,18 @@ class PerceiverForOpticalFlow(PerceiverPreTrainedModel):
def __init__(self, config):
super().__init__(config)
- fourier_position_encoding_kwargs_preprocessor = dict(
- num_bands=64,
- max_resolution=config.train_size,
- sine_only=False,
- concat_pos=True,
- )
- fourier_position_encoding_kwargs_decoder = dict(
- concat_pos=True, max_resolution=config.train_size, num_bands=64, sine_only=False
- )
+ fourier_position_encoding_kwargs_preprocessor = {
+ "num_bands": 64,
+ "max_resolution": config.train_size,
+ "sine_only": False,
+ "concat_pos": True,
+ }
+ fourier_position_encoding_kwargs_decoder = {
+ "concat_pos": True,
+ "max_resolution": config.train_size,
+ "num_bands": 64,
+ "sine_only": False,
+ }
image_preprocessor = PerceiverImagePreprocessor(
config,
@@ -1788,24 +1798,24 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
"audio": PerceiverAudioPreprocessor(
config,
position_encoding_type="fourier",
- fourier_position_encoding_kwargs=dict(
- num_bands=192,
- max_resolution=(n_audio_samples,),
- sine_only=False,
- concat_pos=True,
- ),
+ fourier_position_encoding_kwargs={
+ "num_bands": 192,
+ "max_resolution": (n_audio_samples,),
+ "sine_only": False,
+ "concat_pos": True,
+ },
prep_type="patches",
samples_per_patch=config.samples_per_patch,
),
"image": PerceiverImagePreprocessor(
config,
position_encoding_type="fourier",
- fourier_position_encoding_kwargs=dict(
- num_bands=32,
- max_resolution=(config.num_frames, config.image_size, config.image_size),
- sine_only=False,
- concat_pos=True,
- ),
+ fourier_position_encoding_kwargs={
+ "num_bands": 32,
+ "max_resolution": (config.num_frames, config.image_size, config.image_size),
+ "sine_only": False,
+ "concat_pos": True,
+ },
prep_type="patches",
spatial_downsample=4,
temporal_downsample=1,
@@ -1824,12 +1834,12 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
use_query_residual=False,
position_encoding_only=True,
position_encoding_type="fourier",
- fourier_position_encoding_kwargs=dict(
- num_bands=32,
- max_resolution=(config.num_frames, config.image_size, config.image_size),
- sine_only=False,
- concat_pos=True,
- ),
+ fourier_position_encoding_kwargs={
+ "num_bands": 32,
+ "max_resolution": (config.num_frames, config.image_size, config.image_size),
+ "sine_only": False,
+ "concat_pos": True,
+ },
)
decoder = PerceiverMultimodalDecoder(
@@ -1848,12 +1858,12 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
use_query_residual=False,
position_encoding_only=True,
position_encoding_type="fourier",
- fourier_position_encoding_kwargs=dict(
- num_bands=192,
- max_resolution=(n_audio_samples,),
- sine_only=False,
- concat_pos=True,
- ),
+ fourier_position_encoding_kwargs={
+ "num_bands": 192,
+ "max_resolution": (n_audio_samples,),
+ "sine_only": False,
+ "concat_pos": True,
+ },
),
"image": image_decoder,
"label": PerceiverClassificationDecoder(
@@ -1863,10 +1873,10 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
use_query_residual=False,
position_encoding_only=True,
position_encoding_type="trainable",
- trainable_position_encoding_kwargs=dict(
- num_channels=1024,
- index_dims=1,
- ),
+ trainable_position_encoding_kwargs={
+ "num_channels": 1024,
+ "index_dims": 1,
+ },
),
},
num_outputs=None,
@@ -2180,9 +2190,7 @@ class PerceiverBasicDecoder(PerceiverAbstractDecoder):
# to get the indices for the unflattened array
# unravel_index returns a tuple (x_idx, y_idx, ...)
# stack to get the [n, d] tensor of coordinates
- indices = list(
- torch.from_numpy(x) for x in np.unravel_index(subsampled_points.cpu(), self.output_index_dims)
- )
+ indices = [torch.from_numpy(x) for x in np.unravel_index(subsampled_points.cpu(), self.output_index_dims)]
pos = torch.stack(indices, dim=1)
batch_size = inputs.shape[0]
# Map these coordinates to [-1, 1]
@@ -2476,9 +2484,9 @@ class PerceiverMultimodalDecoder(PerceiverAbstractDecoder):
inputs = restructure(modality_sizes, inputs)
# Obtain modality-specific decoders' queries
- subsampled_points = subsampled_points or dict()
+ subsampled_points = subsampled_points or {}
- decoder_queries = dict()
+ decoder_queries = {}
for modality, decoder in self.modalities.items():
# Get input_without_pos for this modality if it exists.
input_without_pos = None
@@ -3363,7 +3371,7 @@ class PerceiverMultimodalPreprocessor(AbstractPreprocessor):
super().__init__()
self.modalities = nn.ModuleDict(modalities)
self.min_padding_size = min_padding_size
- self.mask_probs = mask_probs if mask_probs is not None else dict()
+ self.mask_probs = mask_probs if mask_probs is not None else {}
self.padding = nn.ParameterDict(
{
modality: nn.Parameter(torch.randn(1, self.num_channels - preprocessor.num_channels))
diff --git a/src/transformers/models/phobert/tokenization_phobert.py b/src/transformers/models/phobert/tokenization_phobert.py
index dd294ac43a..4011ea8b56 100644
--- a/src/transformers/models/phobert/tokenization_phobert.py
+++ b/src/transformers/models/phobert/tokenization_phobert.py
@@ -297,7 +297,7 @@ class PhobertTokenizer(PreTrainedTokenizer):
words = re.findall(r"\S+\n?", text)
for token in words:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
def _convert_token_to_id(self, token):
diff --git a/src/transformers/models/realm/tokenization_realm.py b/src/transformers/models/realm/tokenization_realm.py
index de067b0594..a6c09f1b97 100644
--- a/src/transformers/models/realm/tokenization_realm.py
+++ b/src/transformers/models/realm/tokenization_realm.py
@@ -294,7 +294,7 @@ class RealmTokenizer(PreTrainedTokenizer):
if encoded_token_type_ids is not None:
output_data["token_type_ids"].append(encoded_token_type_ids)
- output_data = dict((key, item) for key, item in output_data.items() if len(item) != 0)
+ output_data = {key: item for key, item in output_data.items() if len(item) != 0}
return BatchEncoding(output_data, tensor_type=return_tensors)
diff --git a/src/transformers/models/realm/tokenization_realm_fast.py b/src/transformers/models/realm/tokenization_realm_fast.py
index 4db8b165b9..1cc1a99665 100644
--- a/src/transformers/models/realm/tokenization_realm_fast.py
+++ b/src/transformers/models/realm/tokenization_realm_fast.py
@@ -259,7 +259,7 @@ class RealmTokenizerFast(PreTrainedTokenizerFast):
if encoded_token_type_ids is not None:
output_data["token_type_ids"].append(encoded_token_type_ids)
- output_data = dict((key, item) for key, item in output_data.items() if len(item) != 0)
+ output_data = {key: item for key, item in output_data.items() if len(item) != 0}
return BatchEncoding(output_data, tensor_type=return_tensors)
diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py
index 9b24b342bf..ff90b9ac9a 100755
--- a/src/transformers/models/reformer/modeling_reformer.py
+++ b/src/transformers/models/reformer/modeling_reformer.py
@@ -87,7 +87,7 @@ def _get_least_common_mult_chunk_len(config):
return config.lsh_attn_chunk_length
elif len(attn_types_set) == 1 and attn_types[0] == "local":
return config.local_attn_chunk_length
- elif len(attn_types_set) == 2 and attn_types_set == set(["lsh", "local"]):
+ elif len(attn_types_set) == 2 and attn_types_set == {"lsh", "local"}:
return np.lcm(config.lsh_attn_chunk_length, config.local_attn_chunk_length)
else:
raise NotImplementedError(
@@ -103,7 +103,7 @@ def _get_min_chunk_len(config):
return config.lsh_attn_chunk_length
elif len(attn_types_set) == 1 and attn_types[0] == "local":
return config.local_attn_chunk_length
- elif len(attn_types_set) == 2 and attn_types_set == set(["lsh", "local"]):
+ elif len(attn_types_set) == 2 and attn_types_set == {"lsh", "local"}:
return min(config.lsh_attn_chunk_length, config.local_attn_chunk_length)
else:
raise NotImplementedError(
@@ -1277,7 +1277,7 @@ class ReformerAttention(nn.Module):
self.self_attention = LSHSelfAttention(config)
elif len(set(self.attn_layers)) == 1 and self.attn_layers[0] == "local":
self.self_attention = LocalSelfAttention(config)
- elif len(set(self.attn_layers)) == 2 and set(self.attn_layers) == set(["lsh", "local"]):
+ elif len(set(self.attn_layers)) == 2 and set(self.attn_layers) == {"lsh", "local"}:
# get correct attn layers
if self.attn_layers[self.layer_id] == "lsh":
self.self_attention = LSHSelfAttention(config)
diff --git a/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py
index 22a8a99ca2..f379b40d2a 100644
--- a/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py
+++ b/src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py
@@ -60,7 +60,7 @@ class Tracker:
for name, m in self.module.named_modules():
self.handles.append(m.register_forward_hook(partial(self._forward_hook, name=name)))
self.module(x)
- list(map(lambda x: x.remove(), self.handles))
+ [x.remove() for x in self.handles]
return self
@property
diff --git a/src/transformers/models/regnet/convert_regnet_to_pytorch.py b/src/transformers/models/regnet/convert_regnet_to_pytorch.py
index 6b34c6aa19..1228e65c46 100644
--- a/src/transformers/models/regnet/convert_regnet_to_pytorch.py
+++ b/src/transformers/models/regnet/convert_regnet_to_pytorch.py
@@ -53,7 +53,7 @@ class Tracker:
for m in self.module.modules():
self.handles.append(m.register_forward_hook(self._forward_hook))
self.module(x)
- list(map(lambda x: x.remove(), self.handles))
+ [x.remove() for x in self.handles]
return self
@property
diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py
index b1759d71b0..2c3a1ac42e 100644
--- a/src/transformers/models/regnet/modeling_tf_regnet.py
+++ b/src/transformers/models/regnet/modeling_tf_regnet.py
@@ -247,7 +247,7 @@ class TFRegNetStage(tf.keras.layers.Layer):
class TFRegNetEncoder(tf.keras.layers.Layer):
def __init__(self, config: RegNetConfig, **kwargs):
super().__init__(**kwargs)
- self.stages = list()
+ self.stages = []
# based on `downsample_in_first_stage`, the first layer of the first stage may or may not downsample the input
self.stages.append(
TFRegNetStage(
diff --git a/src/transformers/models/rembert/tokenization_rembert.py b/src/transformers/models/rembert/tokenization_rembert.py
index cff101451b..2a3c6e4faf 100644
--- a/src/transformers/models/rembert/tokenization_rembert.py
+++ b/src/transformers/models/rembert/tokenization_rembert.py
@@ -219,7 +219,7 @@ class RemBertTokenizer(PreTrainedTokenizer):
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
)
- return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
+ return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0]
if token_ids_1 is not None:
return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
diff --git a/src/transformers/models/rembert/tokenization_rembert_fast.py b/src/transformers/models/rembert/tokenization_rembert_fast.py
index 5d5032f411..bc9593c0b5 100644
--- a/src/transformers/models/rembert/tokenization_rembert_fast.py
+++ b/src/transformers/models/rembert/tokenization_rembert_fast.py
@@ -191,7 +191,7 @@ class RemBertTokenizerFast(PreTrainedTokenizerFast):
"You should not supply a second sequence if the provided sequence of "
"ids is already formatted with special tokens for the model."
)
- return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
+ return [1 if x in [self.sep_token_id, self.cls_token_id] else 0 for x in token_ids_0]
if token_ids_1 is not None:
return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
diff --git a/src/transformers/models/resnet/convert_resnet_to_pytorch.py b/src/transformers/models/resnet/convert_resnet_to_pytorch.py
index 5f836c9d2a..f32887c964 100644
--- a/src/transformers/models/resnet/convert_resnet_to_pytorch.py
+++ b/src/transformers/models/resnet/convert_resnet_to_pytorch.py
@@ -51,7 +51,7 @@ class Tracker:
for m in self.module.modules():
self.handles.append(m.register_forward_hook(self._forward_hook))
self.module(x)
- list(map(lambda x: x.remove(), self.handles))
+ [x.remove() for x in self.handles]
return self
@property
diff --git a/src/transformers/models/roc_bert/modeling_roc_bert.py b/src/transformers/models/roc_bert/modeling_roc_bert.py
index c8c85ff142..af7ac57410 100644
--- a/src/transformers/models/roc_bert/modeling_roc_bert.py
+++ b/src/transformers/models/roc_bert/modeling_roc_bert.py
@@ -1240,7 +1240,7 @@ class RoCBertForPreTraining(RoCBertPreTrainedModel):
sim_matrix = torch.matmul(pooled_output_norm, attack_pooled_output_norm.T) # batch_size * hidden_dim
sim_matrix_target = torch.matmul(labels_pooled_output_norm, attack_pooled_output_norm.T)
- batch_labels = torch.tensor([i for i in range(batch_size)], device=device)
+ batch_labels = torch.tensor(list(range(batch_size)), device=device)
contrastive_loss = (
loss_fct(100 * sim_matrix.view(batch_size, -1), batch_labels.view(-1))
+ loss_fct(100 * sim_matrix_target.view(batch_size, -1), batch_labels.view(-1))
diff --git a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py
index 6c1cd993fe..eb4d852624 100644
--- a/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py
+++ b/src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py
@@ -95,12 +95,10 @@ def convert_fairseq_s2t_checkpoint_to_tfms(checkpoint_path, pytorch_dump_folder_
model = Speech2TextForConditionalGeneration(config)
missing, unexpected = model.model.load_state_dict(state_dict, strict=False)
- if len(missing) > 0 and not set(missing) <= set(
- [
- "encoder.embed_positions.weights",
- "decoder.embed_positions.weights",
- ]
- ):
+ if len(missing) > 0 and not set(missing) <= {
+ "encoder.embed_positions.weights",
+ "decoder.embed_positions.weights",
+ }:
raise ValueError(
"Only `encoder.embed_positions.weights` and `decoder.embed_positions.weights` are allowed to be missing,"
f" but all the following weights are missing {missing}"
diff --git a/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py
index 4c90ba05ba..c021619cd0 100644
--- a/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py
+++ b/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py
@@ -213,7 +213,7 @@ class Speech2Text2Tokenizer(PreTrainedTokenizer):
split_tokens = []
for token in text:
if token:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py
index abf47cf831..5f572c23a8 100644
--- a/src/transformers/models/swin/modeling_swin.py
+++ b/src/transformers/models/swin/modeling_swin.py
@@ -1259,7 +1259,7 @@ class SwinBackbone(SwinPreTrainedModel, BackboneMixin):
self.out_feature_channels[stage] = num_features[i]
# Add layer norms to hidden states of out_features
- hidden_states_norms = dict()
+ hidden_states_norms = {}
for stage, num_channels in zip(self.out_features, self.channels):
hidden_states_norms[stage] = nn.LayerNorm(num_channels)
self.hidden_states_norms = nn.ModuleDict(hidden_states_norms)
diff --git a/src/transformers/models/tapas/tokenization_tapas.py b/src/transformers/models/tapas/tokenization_tapas.py
index 395ec876c9..0bd558aee8 100644
--- a/src/transformers/models/tapas/tokenization_tapas.py
+++ b/src/transformers/models/tapas/tokenization_tapas.py
@@ -1688,7 +1688,7 @@ class TapasTokenizer(PreTrainedTokenizer):
for col_index in range(num_columns):
for row_index in range(num_rows):
- indices = [index for index in self._get_cell_token_indexes(column_ids, row_ids, col_index, row_index)]
+ indices = list(self._get_cell_token_indexes(column_ids, row_ids, col_index, row_index))
num_indices = len(indices)
if num_indices > 1:
for index in indices:
diff --git a/src/transformers/models/tapex/tokenization_tapex.py b/src/transformers/models/tapex/tokenization_tapex.py
index c41c6cbe47..e2543a3378 100644
--- a/src/transformers/models/tapex/tokenization_tapex.py
+++ b/src/transformers/models/tapex/tokenization_tapex.py
@@ -1453,16 +1453,16 @@ class TapexTokenizer(PreTrainedTokenizer):
truncated_unrelated_indices = []
related_indices = []
if answer is None or len(answer) == 0:
- answer_set = set([])
+ answer_set = set()
else:
- answer_set = set([ans_ex.lower() for ans_ex in answer])
+ answer_set = {ans_ex.lower() for ans_ex in answer}
# add question key words into answer set
if question is not None:
answer_set.update(question.split())
question_set = set(question.strip("?!.,").split(" "))
row_max_len = len(table_content["rows"])
for _row_idx, row in enumerate(table_content["rows"]):
- lower_row = set([str(cell).lower() for cell in row])
+ lower_row = {str(cell).lower() for cell in row}
if len(lower_row & answer_set) == 0 and len(lower_row & question_set) == 0:
truncated_unrelated_indices.append(_row_idx)
else:
diff --git a/src/transformers/models/van/convert_van_to_pytorch.py b/src/transformers/models/van/convert_van_to_pytorch.py
index a8086e6d1b..0cb51e59e6 100644
--- a/src/transformers/models/van/convert_van_to_pytorch.py
+++ b/src/transformers/models/van/convert_van_to_pytorch.py
@@ -55,7 +55,7 @@ class Tracker:
for m in self.module.modules():
self.handles.append(m.register_forward_hook(self._forward_hook))
self.module(x)
- list(map(lambda x: x.remove(), self.handles))
+ [x.remove() for x in self.handles]
return self
@property
diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py
index 61cc69b694..6704fe42b1 100755
--- a/src/transformers/models/vilt/modeling_vilt.py
+++ b/src/transformers/models/vilt/modeling_vilt.py
@@ -171,7 +171,7 @@ class ViltEmbeddings(nn.Module):
non_valid_nums = [v.size(0) for v in non_valid_row_idx]
pad_nums = [max_image_length - v for v in valid_nums]
- select = list()
+ select = []
for i, (v, nv, p) in enumerate(zip(valid_nums, non_valid_nums, pad_nums)):
if p <= 0:
valid_choice = torch.multinomial(torch.ones(v).float(), max_image_length)
diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py
index 42fd1131cf..54888aea2c 100644
--- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py
@@ -648,7 +648,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer):
if self.verbose:
logger.info(f"Adding {token} to the vocabulary")
- added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add))
+ added_tok_encoder = {tok: len(self) + i for i, tok in enumerate(tokens_to_add)}
added_tok_decoder = {v: k for k, v in added_tok_encoder.items()}
self.added_tokens_encoder.update(added_tok_encoder)
self.added_tokens_decoder.update(added_tok_decoder)
diff --git a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py
index 74e2d3525b..f3ad23a1cd 100644
--- a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py
+++ b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py
@@ -615,7 +615,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer):
if self.verbose:
logger.info(f"Adding {token} to the vocabulary")
- added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add))
+ added_tok_encoder = {tok: len(self) + i for i, tok in enumerate(tokens_to_add)}
added_tok_decoder = {v: k for k, v in added_tok_encoder.items()}
self.added_tokens_encoder.update(added_tok_encoder)
self.added_tokens_decoder.update(added_tok_decoder)
diff --git a/src/transformers/models/whisper/convert_openai_to_hf.py b/src/transformers/models/whisper/convert_openai_to_hf.py
index 7c2e0c40a0..3e7d42634b 100644
--- a/src/transformers/models/whisper/convert_openai_to_hf.py
+++ b/src/transformers/models/whisper/convert_openai_to_hf.py
@@ -157,12 +157,10 @@ def convert_openai_whisper_to_tfms(checkpoint_path, pytorch_dump_folder_path):
model = WhisperForConditionalGeneration(config)
missing, unexpected = model.model.load_state_dict(state_dict, strict=False)
- if len(missing) > 0 and not set(missing) <= set(
- [
- "encoder.embed_positions.weights",
- "decoder.embed_positions.weights",
- ]
- ):
+ if len(missing) > 0 and not set(missing) <= {
+ "encoder.embed_positions.weights",
+ "decoder.embed_positions.weights",
+ }:
raise ValueError(
"Only `encoder.embed_positions.weights` and `decoder.embed_positions.weights` are allowed to be missing,"
f" but all the following weights are missing {missing}"
diff --git a/src/transformers/models/whisper/english_normalizer.py b/src/transformers/models/whisper/english_normalizer.py
index e72d2e89b2..7f6aab4ad2 100644
--- a/src/transformers/models/whisper/english_normalizer.py
+++ b/src/transformers/models/whisper/english_normalizer.py
@@ -189,25 +189,23 @@ class EnglishNumberNormalizer:
}
self.specials = {"and", "double", "triple", "point"}
- self.words = set(
- [
- key
- for mapping in [
- self.zeros,
- self.ones,
- self.ones_suffixed,
- self.tens,
- self.tens_suffixed,
- self.multipliers,
- self.multipliers_suffixed,
- self.preceding_prefixers,
- self.following_prefixers,
- self.suffixers,
- self.specials,
- ]
- for key in mapping
+ self.words = {
+ key
+ for mapping in [
+ self.zeros,
+ self.ones,
+ self.ones_suffixed,
+ self.tens,
+ self.tens_suffixed,
+ self.multipliers,
+ self.multipliers_suffixed,
+ self.preceding_prefixers,
+ self.following_prefixers,
+ self.suffixers,
+ self.specials,
]
- )
+ for key in mapping
+ }
self.literal_words = {"one", "ones"}
def process_words(self, words: List[str]) -> Iterator[str]:
diff --git a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
index 4221cdfc90..6f3cdf920a 100755
--- a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
@@ -43,10 +43,10 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
two_levels_state_dict["transformer." + k] = v
config = chkpt["params"]
- config = dict((n, v) for n, v in config.items() if not isinstance(v, (torch.FloatTensor, numpy.ndarray)))
+ config = {n: v for n, v in config.items() if not isinstance(v, (torch.FloatTensor, numpy.ndarray))}
vocab = chkpt["dico_word2id"]
- vocab = dict((s + "" if s.find("@@") == -1 and i > 13 else s.replace("@@", ""), i) for s, i in vocab.items())
+ vocab = {s + "" if s.find("@@") == -1 and i > 13 else s.replace("@@", ""): i for s, i in vocab.items()}
# Save pytorch-model
pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
diff --git a/src/transformers/models/xlm/tokenization_xlm.py b/src/transformers/models/xlm/tokenization_xlm.py
index cbfb2b48ff..5cab4fc996 100644
--- a/src/transformers/models/xlm/tokenization_xlm.py
+++ b/src/transformers/models/xlm/tokenization_xlm.py
@@ -638,10 +638,10 @@ class XLMTokenizer(PreTrainedTokenizer):
self.sm = sacremoses
# cache of sm.MosesPunctNormalizer instance
- self.cache_moses_punct_normalizer = dict()
+ self.cache_moses_punct_normalizer = {}
# cache of sm.MosesTokenizer instance
- self.cache_moses_tokenizer = dict()
- self.lang_with_custom_tokenizer = set(["zh", "th", "ja"])
+ self.cache_moses_tokenizer = {}
+ self.lang_with_custom_tokenizer = {"zh", "th", "ja"}
# True for current supported model (v1.2.0), False for XLM-17 & 100
self.do_lowercase_and_remove_accent = do_lowercase_and_remove_accent
self.lang2id = lang2id
@@ -851,7 +851,7 @@ class XLMTokenizer(PreTrainedTokenizer):
split_tokens = []
for token in text:
if token:
- split_tokens.extend([t for t in self.bpe(token).split(" ")])
+ split_tokens.extend(list(self.bpe(token).split(" ")))
return split_tokens
diff --git a/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py
index 151606d196..6352b71300 100644
--- a/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py
@@ -142,7 +142,7 @@ def convert_xmod_checkpoint_to_pytorch(
bert_output.adapter_layer_norm.weight = xmod_layer.adapter_layer_norm.weight
bert_output.adapter_layer_norm.bias = xmod_layer.adapter_layer_norm.bias
- if list(sorted(bert_output.adapter_modules.keys())) != list(sorted(xmod_layer.adapter_modules.keys())):
+ if sorted(bert_output.adapter_modules.keys()) != sorted(xmod_layer.adapter_modules.keys()):
raise AssertionError("Lists of language adapters do not match.")
for lang_code, adapter in xmod_layer.adapter_modules.items():
to_adapter = bert_output.adapter_modules[lang_code]
diff --git a/src/transformers/models/xmod/modeling_xmod.py b/src/transformers/models/xmod/modeling_xmod.py
index 354d04bac6..c19b8fabaa 100644
--- a/src/transformers/models/xmod/modeling_xmod.py
+++ b/src/transformers/models/xmod/modeling_xmod.py
@@ -395,7 +395,7 @@ class XmodOutput(nn.Module):
else:
self.adapter_layer_norm = None
self.adapter_reuse_layer_norm = config.adapter_reuse_layer_norm
- self.adapter_modules = nn.ModuleDict(dict())
+ self.adapter_modules = nn.ModuleDict({})
for language in config.languages:
self.adapter_modules[str(language)] = XmodAdapter(config)
diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py
index f49d5d14fd..a8fb00aee5 100644
--- a/src/transformers/models/yolos/image_processing_yolos.py
+++ b/src/transformers/models/yolos/image_processing_yolos.py
@@ -515,7 +515,7 @@ def binary_mask_to_rle(mask):
pixels = np.concatenate([[0], pixels, [0]])
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
runs[1::2] -= runs[::2]
- return [x for x in runs]
+ return list(runs)
# Copied from transformers.models.detr.image_processing_detr.convert_segmentation_to_rle
diff --git a/src/transformers/onnx/convert.py b/src/transformers/onnx/convert.py
index ee9c498e73..918134d311 100644
--- a/src/transformers/onnx/convert.py
+++ b/src/transformers/onnx/convert.py
@@ -145,7 +145,7 @@ def export_pytorch(
device = torch.device(device)
if device.type == "cuda" and torch.cuda.is_available():
model.to(device)
- model_inputs_device = dict()
+ model_inputs_device = {}
for k, v in model_inputs.items():
if isinstance(v, Tuple):
model_inputs_device[k] = tuple(
diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py
index 47201b0924..659b92a59b 100644
--- a/src/transformers/optimization.py
+++ b/src/transformers/optimization.py
@@ -358,7 +358,7 @@ class AdamW(Optimizer):
raise ValueError(f"Invalid beta parameter: {betas[1]} - should be in [0.0, 1.0)")
if not 0.0 <= eps:
raise ValueError(f"Invalid epsilon value: {eps} - should be >= 0.0")
- defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, correct_bias=correct_bias)
+ defaults = {"lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay, "correct_bias": correct_bias}
super().__init__(params, defaults)
def step(self, closure: Callable = None):
@@ -527,17 +527,17 @@ class Adafactor(Optimizer):
if warmup_init and not relative_step:
raise ValueError("`warmup_init=True` requires `relative_step=True`")
- defaults = dict(
- lr=lr,
- eps=eps,
- clip_threshold=clip_threshold,
- decay_rate=decay_rate,
- beta1=beta1,
- weight_decay=weight_decay,
- scale_parameter=scale_parameter,
- relative_step=relative_step,
- warmup_init=warmup_init,
- )
+ defaults = {
+ "lr": lr,
+ "eps": eps,
+ "clip_threshold": clip_threshold,
+ "decay_rate": decay_rate,
+ "beta1": beta1,
+ "weight_decay": weight_decay,
+ "scale_parameter": scale_parameter,
+ "relative_step": relative_step,
+ "warmup_init": warmup_init,
+ }
super().__init__(params, defaults)
@staticmethod
diff --git a/src/transformers/optimization_tf.py b/src/transformers/optimization_tf.py
index db7238d7f4..b42e04041b 100644
--- a/src/transformers/optimization_tf.py
+++ b/src/transformers/optimization_tf.py
@@ -262,7 +262,7 @@ class AdamWeightDecay(Adam):
coefficients = self._fallback_apply_state(var_device, var_dtype)
apply_state[(var_device, var_dtype)] = coefficients
- return coefficients["lr_t"], dict(apply_state=apply_state)
+ return coefficients["lr_t"], {"apply_state": apply_state}
def _resource_apply_dense(self, grad, var, apply_state=None):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
@@ -333,7 +333,7 @@ class GradientAccumulator(object):
"""The accumulated gradients on the current replica."""
if not self._gradients:
raise ValueError("The accumulator should be called first to initialize the gradients")
- return list(gradient.value() if gradient is not None else gradient for gradient in self._gradients)
+ return [gradient.value() if gradient is not None else gradient for gradient in self._gradients]
def __call__(self, gradients):
"""Accumulates `gradients` on the current replica."""
diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py
index 528e83d8f1..054c7e57a7 100644
--- a/src/transformers/pipelines/base.py
+++ b/src/transformers/pipelines/base.py
@@ -1083,7 +1083,7 @@ class Pipeline(_ScikitCompat):
final_iterator = self.get_iterator(
inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
)
- outputs = [output for output in final_iterator]
+ outputs = list(final_iterator)
return outputs
else:
return self.run_multi(inputs, preprocess_params, forward_params, postprocess_params)
diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py
index 746d3c1eae..884cee78ca 100644
--- a/src/transformers/pipelines/question_answering.py
+++ b/src/transformers/pipelines/question_answering.py
@@ -210,7 +210,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
inputs = [inputs]
elif isinstance(inputs, Iterable):
# Copy to avoid overriding arguments
- inputs = [i for i in inputs]
+ inputs = list(inputs)
else:
raise ValueError(f"Invalid arguments {kwargs}")
diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
index 4dbbee4144..3398ee3091 100644
--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -425,7 +425,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
if self.verbose:
logger.info(f"Adding {token} to the vocabulary")
- added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add))
+ added_tok_encoder = {tok: len(self) + i for i, tok in enumerate(tokens_to_add)}
added_tok_decoder = {v: k for k, v in added_tok_encoder.items()}
self.added_tokens_encoder.update(added_tok_encoder)
self.added_tokens_decoder.update(added_tok_decoder)
@@ -495,9 +495,9 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
`List[str]`: The list of tokens.
"""
# Simple mapping string => AddedToken for special tokens with specific tokenization behaviors
- all_special_tokens_extended = dict(
- (str(t), t) for t in self.all_special_tokens_extended if isinstance(t, AddedToken)
- )
+ all_special_tokens_extended = {
+ str(t): t for t in self.all_special_tokens_extended if isinstance(t, AddedToken)
+ }
text, kwargs = self.prepare_for_tokenization(text, **kwargs)
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index c11000111b..eb52ef0adb 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1918,7 +1918,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
obj.pop("__type")
return AddedToken(**obj)
elif isinstance(obj, (list, tuple)):
- return list(convert_added_tokens(o) for o in obj)
+ return [convert_added_tokens(o) for o in obj]
elif isinstance(obj, dict):
return {k: convert_added_tokens(v) for k, v in obj.items()}
return obj
@@ -1992,7 +1992,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
added_tok_encoder = json.load(added_tokens_handle)
# Sort added tokens by index
- added_tok_encoder_sorted = list(sorted(added_tok_encoder.items(), key=lambda x: x[1]))
+ added_tok_encoder_sorted = sorted(added_tok_encoder.items(), key=lambda x: x[1])
# Accumulate added tokens into batches of special/non-special tokens, because calling add_tokens() for
# individual tokens would repeatedly rebuild a trie, which can be slow.
@@ -2129,7 +2129,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
out["__type"] = "AddedToken"
return out
elif isinstance(obj, (list, tuple)):
- return list(convert_added_tokens(o, add_type_field=add_type_field) for o in obj)
+ return [convert_added_tokens(o, add_type_field=add_type_field) for o in obj]
elif isinstance(obj, dict):
return {k: convert_added_tokens(v, add_type_field=add_type_field) for k, v in obj.items()}
return obj
@@ -2502,23 +2502,23 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
you must set `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
"""
# To avoid duplicating
- all_kwargs = dict(
- add_special_tokens=add_special_tokens,
- padding=padding,
- truncation=truncation,
- max_length=max_length,
- stride=stride,
- is_split_into_words=is_split_into_words,
- pad_to_multiple_of=pad_to_multiple_of,
- return_tensors=return_tensors,
- return_token_type_ids=return_token_type_ids,
- return_attention_mask=return_attention_mask,
- return_overflowing_tokens=return_overflowing_tokens,
- return_special_tokens_mask=return_special_tokens_mask,
- return_offsets_mapping=return_offsets_mapping,
- return_length=return_length,
- verbose=verbose,
- )
+ all_kwargs = {
+ "add_special_tokens": add_special_tokens,
+ "padding": padding,
+ "truncation": truncation,
+ "max_length": max_length,
+ "stride": stride,
+ "is_split_into_words": is_split_into_words,
+ "pad_to_multiple_of": pad_to_multiple_of,
+ "return_tensors": return_tensors,
+ "return_token_type_ids": return_token_type_ids,
+ "return_attention_mask": return_attention_mask,
+ "return_overflowing_tokens": return_overflowing_tokens,
+ "return_special_tokens_mask": return_special_tokens_mask,
+ "return_offsets_mapping": return_offsets_mapping,
+ "return_length": return_length,
+ "verbose": verbose,
+ }
all_kwargs.update(kwargs)
if text is None and text_target is None:
raise ValueError("You need to specify either `text` or `text_target`.")
@@ -3010,7 +3010,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
batch_outputs = {}
for i in range(batch_size):
- inputs = dict((k, v[i]) for k, v in encoded_inputs.items())
+ inputs = {k: v[i] for k, v in encoded_inputs.items()}
outputs = self._pad(
inputs,
max_length=max_length,
diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py
index bcdbd8325b..b484464f68 100644
--- a/src/transformers/tokenization_utils_fast.py
+++ b/src/transformers/tokenization_utils_fast.py
@@ -162,7 +162,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
"""
base_vocab = self._tokenizer.get_vocab(with_added_tokens=False)
full_vocab = self._tokenizer.get_vocab(with_added_tokens=True)
- added_vocab = dict((tok, index) for tok, index in full_vocab.items() if tok not in base_vocab)
+ added_vocab = {tok: index for tok, index in full_vocab.items() if tok not in base_vocab}
return added_vocab
def __len__(self) -> int:
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index e4aa3f40a3..1f7df7e9f3 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -1081,7 +1081,7 @@ class Trainer:
skipped = 0
for module in opt_model.modules():
if isinstance(module, nn.Embedding):
- skipped += sum(dict((p.data_ptr(), p.numel()) for p in module.parameters()).values())
+ skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values())
print(f"skipped {module}: {skipped/2**20}M params")
manager.register_module_override(module, "weight", {"optim_bits": 32})
logger.debug(f"bitsandbytes: will optimize {module} in fp32")
@@ -2564,12 +2564,12 @@ class Trainer:
elif isinstance(data, (tuple, list)):
return type(data)(self._prepare_input(v) for v in data)
elif isinstance(data, torch.Tensor):
- kwargs = dict(device=self.args.device)
+ kwargs = {"device": self.args.device}
if self.deepspeed and data.dtype != torch.int64:
# NLP models inputs are int64 and those get adjusted to the right dtype of the
# embedding. Other models such as wav2vec2's inputs are already float and thus
# may need special handling to match the dtypes of the model
- kwargs.update(dict(dtype=self.args.hf_deepspeed_config.dtype()))
+ kwargs.update({"dtype": self.args.hf_deepspeed_config.dtype()})
return data.to(**kwargs)
return data
diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py
index e6e5cca950..eefbb52683 100644
--- a/src/transformers/trainer_pt_utils.py
+++ b/src/transformers/trainer_pt_utils.py
@@ -534,7 +534,7 @@ def get_length_grouped_indices(lengths, batch_size, mega_batch_mult=None, genera
indices = torch.randperm(len(lengths), generator=generator)
megabatch_size = mega_batch_mult * batch_size
megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
- megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+ megabatches = [sorted(megabatch, key=lambda i: lengths[i], reverse=True) for megabatch in megabatches]
# The rest is to get the biggest batch first.
# Since each megabatch is sorted by descending length, the longest element is the first
diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py
index af63761d82..9f273ab1ed 100644
--- a/src/transformers/trainer_utils.py
+++ b/src/transformers/trainer_utils.py
@@ -505,21 +505,21 @@ class TrainerMemoryTracker:
if self.torch is not None:
self.gpu_mem_used_now = self.torch.cuda.memory_allocated()
self.gpu_mem_used_peak = self.torch.cuda.max_memory_allocated()
- self.gpu[self.cur_stage] = dict(
- begin=self.gpu_mem_used_at_start,
- end=self.gpu_mem_used_now,
- alloc=(self.gpu_mem_used_now - self.gpu_mem_used_at_start),
- peaked=max(0, self.gpu_mem_used_peak - self.gpu_mem_used_now),
- )
+ self.gpu[self.cur_stage] = {
+ "begin": self.gpu_mem_used_at_start,
+ "end": self.gpu_mem_used_now,
+ "alloc": (self.gpu_mem_used_now - self.gpu_mem_used_at_start),
+ "peaked": max(0, self.gpu_mem_used_peak - self.gpu_mem_used_now),
+ }
# cpu
self.cpu_mem_used_now = self.cpu_mem_used()
- self.cpu[self.cur_stage] = dict(
- begin=self.cpu_mem_used_at_start,
- end=self.cpu_mem_used_now,
- alloc=(self.cpu_mem_used_now - self.cpu_mem_used_at_start),
- peaked=max(0, self.cpu_mem_used_peak - self.cpu_mem_used_now),
- )
+ self.cpu[self.cur_stage] = {
+ "begin": self.cpu_mem_used_at_start,
+ "end": self.cpu_mem_used_now,
+ "alloc": (self.cpu_mem_used_now - self.cpu_mem_used_at_start),
+ "peaked": max(0, self.cpu_mem_used_peak - self.cpu_mem_used_now),
+ }
# reset - cycle finished
self.cur_stage = None
diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py
index 28ba71f6af..dc3c0c4244 100644
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -1874,7 +1874,7 @@ class TrainingArguments:
the token values by removing their value.
"""
# filter out fields that are defined as field(init=False)
- d = dict((field.name, getattr(self, field.name)) for field in fields(self) if field.init)
+ d = {field.name: getattr(self, field.name) for field in fields(self) if field.init}
for k, v in d.items():
if isinstance(v, Enum):
diff --git a/src/transformers/utils/doc.py b/src/transformers/utils/doc.py
index 2e6264c508..f5eea7ae4e 100644
--- a/src/transformers/utils/doc.py
+++ b/src/transformers/utils/doc.py
@@ -1085,19 +1085,19 @@ def add_code_sample_docstrings(
# putting all kwargs for docstrings in a dict to be used
# with the `.format(**doc_kwargs)`. Note that string might
# be formatted with non-existing keys, which is fine.
- doc_kwargs = dict(
- model_class=model_class,
- processor_class=processor_class,
- checkpoint=checkpoint,
- mask=mask,
- qa_target_start_index=qa_target_start_index,
- qa_target_end_index=qa_target_end_index,
- expected_output=expected_output,
- expected_loss=expected_loss,
- real_checkpoint=real_checkpoint,
- fake_checkpoint=checkpoint,
- true="{true}", # For syntax that conflicts with formatting.
- )
+ doc_kwargs = {
+ "model_class": model_class,
+ "processor_class": processor_class,
+ "checkpoint": checkpoint,
+ "mask": mask,
+ "qa_target_start_index": qa_target_start_index,
+ "qa_target_end_index": qa_target_end_index,
+ "expected_output": expected_output,
+ "expected_loss": expected_loss,
+ "real_checkpoint": real_checkpoint,
+ "fake_checkpoint": checkpoint,
+ "true": "{true}", # For syntax that conflicts with formatting.
+ }
if ("SequenceClassification" in model_class or "AudioClassification" in model_class) and modality == "audio":
code_sample = sample_docstrings["AudioClassification"]
diff --git a/src/transformers/utils/hp_naming.py b/src/transformers/utils/hp_naming.py
index bc806e8222..f7c5cb5259 100644
--- a/src/transformers/utils/hp_naming.py
+++ b/src/transformers/utils/hp_naming.py
@@ -96,12 +96,12 @@ class TrialShortNamer:
if cls.NAMING_INFO is not None:
return
- info = dict(
- short_word={},
- reverse_short_word={},
- short_param={},
- reverse_short_param={},
- )
+ info = {
+ "short_word": {},
+ "reverse_short_word": {},
+ "short_param": {},
+ "reverse_short_param": {},
+ }
field_keys = list(cls.DEFAULTS.keys())
diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py
index bb3575edf2..2bee24324c 100644
--- a/src/transformers/utils/hub.py
+++ b/src/transformers/utils/hub.py
@@ -902,7 +902,7 @@ def get_checkpoint_shard_files(
with open(index_filename, "r") as f:
index = json.loads(f.read())
- shard_filenames = sorted(list(set(index["weight_map"].values())))
+ shard_filenames = sorted(set(index["weight_map"].values()))
sharded_metadata = index["metadata"]
sharded_metadata["all_checkpoint_keys"] = list(index["weight_map"].keys())
sharded_metadata["weight_map"] = index["weight_map"].copy()
diff --git a/src/transformers/utils/model_parallel_utils.py b/src/transformers/utils/model_parallel_utils.py
index b5d23417ce..7ec79a5e23 100644
--- a/src/transformers/utils/model_parallel_utils.py
+++ b/src/transformers/utils/model_parallel_utils.py
@@ -51,6 +51,6 @@ def get_device_map(n_layers, devices):
"""Returns a dictionary of layers distributed evenly across all devices."""
layers = list(range(n_layers))
n_blocks = int(ceil(n_layers / len(devices)))
- layers_list = list(layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks))
+ layers_list = [layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks)]
return dict(zip(devices, layers_list))
diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py
index 80dc017eea..60cec456c3 100644
--- a/tests/deepspeed/test_deepspeed.py
+++ b/tests/deepspeed/test_deepspeed.py
@@ -157,9 +157,13 @@ class CoreIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon):
super().setUp()
master_port = get_master_port(real_launcher=False)
- self.dist_env_1_gpu = dict(
- MASTER_ADDR="localhost", MASTER_PORT=master_port, RANK="0", LOCAL_RANK="0", WORLD_SIZE="1"
- )
+ self.dist_env_1_gpu = {
+ "MASTER_ADDR": "localhost",
+ "MASTER_PORT": master_port,
+ "RANK": "0",
+ "LOCAL_RANK": "0",
+ "WORLD_SIZE": "1",
+ }
def tearDown(self):
super().tearDown()
@@ -212,14 +216,18 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
self.batch_size = args.train_batch_size
master_port = get_master_port(real_launcher=False)
- self.dist_env_1_gpu = dict(
- MASTER_ADDR="localhost", MASTER_PORT=master_port, RANK="0", LOCAL_RANK="0", WORLD_SIZE="1"
- )
+ self.dist_env_1_gpu = {
+ "MASTER_ADDR": "localhost",
+ "MASTER_PORT": master_port,
+ "RANK": "0",
+ "LOCAL_RANK": "0",
+ "WORLD_SIZE": "1",
+ }
- self.ds_config_file = dict(
- zero2=f"{self.test_file_dir_str}/ds_config_zero2.json",
- zero3=f"{self.test_file_dir_str}/ds_config_zero3.json",
- )
+ self.ds_config_file = {
+ "zero2": f"{self.test_file_dir_str}/ds_config_zero2.json",
+ "zero3": f"{self.test_file_dir_str}/ds_config_zero3.json",
+ }
# use self.get_config_dict(stage) to use these to ensure the original is not modified
with io.open(self.ds_config_file[ZERO2], "r", encoding="utf-8") as f:
@@ -230,10 +238,10 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
# It's in the file as a demo for users since we want everything to work out of the box even if slower.
config_zero3["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = False
- self.ds_config_dict = dict(
- zero2=config_zero2,
- zero3=config_zero3,
- )
+ self.ds_config_dict = {
+ "zero2": config_zero2,
+ "zero3": config_zero3,
+ }
def tearDown(self):
super().tearDown()
@@ -370,7 +378,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
# this actually doesn't have to be on NVMe, any storage will do since this test only
# runs a simple check that we can use some directory as if it were NVMe
nvme_path = self.get_auto_remove_tmp_dir()
- nvme_config = dict(device="nvme", nvme_path=nvme_path)
+ nvme_config = {"device": "nvme", "nvme_path": nvme_path}
ds_config_zero3_dict = self.get_config_dict(ZERO3)
ds_config_zero3_dict["zero_optimization"]["offload_optimizer"] = nvme_config
ds_config_zero3_dict["zero_optimization"]["offload_param"] = nvme_config
@@ -415,7 +423,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
# force cpu offload
ds_config_dict["zero_optimization"]["offload_optimizer"]["device"] = "cpu"
with mockenv_context(**self.dist_env_1_gpu):
- kwargs = dict(local_rank=0, deepspeed=ds_config_dict)
+ kwargs = {"local_rank": 0, "deepspeed": ds_config_dict}
kwargs[dtype] = True
trainer = get_regression_trainer(**kwargs)
with CaptureLogger(deepspeed_logger) as cl:
@@ -431,7 +439,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
# it's run not as a first test as `sys.stdout` will no longer be the same. So we either have
# to reset `deepspeed_logger.handlers[0].setStream(sys.stdout)` or directly capture from the deepspeed_logger.
with mockenv_context(**self.dist_env_1_gpu):
- kwargs = dict(local_rank=0, deepspeed=self.get_config_dict(stage))
+ kwargs = {"local_rank": 0, "deepspeed": self.get_config_dict(stage)}
kwargs[dtype] = True
trainer = get_regression_trainer(**kwargs)
@@ -449,15 +457,15 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
# `self.lr_scheduler.get_last_lr()` and originally it'd fail on the very first step.
with mockenv_context(**self.dist_env_1_gpu):
a = b = 0.0
- kwargs = dict(
- a=a,
- b=b,
- local_rank=0,
- train_len=8,
- deepspeed=self.get_config_dict(stage),
- per_device_train_batch_size=8,
- logging_steps=1,
- )
+ kwargs = {
+ "a": a,
+ "b": b,
+ "local_rank": 0,
+ "train_len": 8,
+ "deepspeed": self.get_config_dict(stage),
+ "per_device_train_batch_size": 8,
+ "logging_steps": 1,
+ }
kwargs[dtype] = True
trainer = get_regression_trainer(**kwargs)
@@ -494,13 +502,13 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
train_len = 64
a = b = 0.0
- kwargs = dict(
- a=a,
- b=b,
- local_rank=0,
- train_len=train_len,
- deepspeed=self.get_config_dict(stage),
- )
+ kwargs = {
+ "a": a,
+ "b": b,
+ "local_rank": 0,
+ "train_len": train_len,
+ "deepspeed": self.get_config_dict(stage),
+ }
kwargs[dtype] = True
with mockenv_context(**self.dist_env_1_gpu):
@@ -583,11 +591,11 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
# save checkpoints
with mockenv_context(**self.dist_env_1_gpu):
- kwargs = dict(
- output_dir=output_dir,
- save_steps=freq,
- deepspeed=ds_config_dict,
- )
+ kwargs = {
+ "output_dir": output_dir,
+ "save_steps": freq,
+ "deepspeed": ds_config_dict,
+ }
kwargs[dtype] = True
trainer = get_regression_trainer(**kwargs)
trainer.train()
@@ -600,7 +608,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
with mockenv_context(**self.dist_env_1_gpu):
ds_config_dict = self.get_config_dict(stage)
output_dir = self.get_auto_remove_tmp_dir()
- kwargs = dict(output_dir=output_dir, deepspeed=ds_config_dict)
+ kwargs = {"output_dir": output_dir, "deepspeed": ds_config_dict}
kwargs[dtype] = True
trainer = get_regression_trainer(**kwargs)
@@ -632,7 +640,13 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
if stage == ZERO3:
ds_config_dict["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = True
- kwargs = dict(output_dir=output_dir, train_len=128, save_steps=5, learning_rate=0.1, deepspeed=ds_config_dict)
+ kwargs = {
+ "output_dir": output_dir,
+ "train_len": 128,
+ "save_steps": 5,
+ "learning_rate": 0.1,
+ "deepspeed": ds_config_dict,
+ }
kwargs[dtype] = True
with mockenv_context(**self.dist_env_1_gpu):
@@ -679,16 +693,16 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
ds_config_dict = self.get_config_dict(stage)
- kwargs = dict(
- output_dir=output_dir,
- train_len=4,
- per_device_train_batch_size=4,
- num_train_epochs=1,
- save_strategy="steps",
- save_steps=1,
- learning_rate=0.1,
- deepspeed=ds_config_dict,
- )
+ kwargs = {
+ "output_dir": output_dir,
+ "train_len": 4,
+ "per_device_train_batch_size": 4,
+ "num_train_epochs": 1,
+ "save_strategy": "steps",
+ "save_steps": 1,
+ "learning_rate": 0.1,
+ "deepspeed": ds_config_dict,
+ }
kwargs[dtype] = True
with mockenv_context(**self.dist_env_1_gpu):
@@ -710,7 +724,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
# test that we can switch from zero2 to zero3 in the same process for example
# test is_zero, etc.
output_dir = self.get_auto_remove_tmp_dir()
- kwargs = dict(output_dir=output_dir, train_len=8, fp16=True)
+ kwargs = {"output_dir": output_dir, "train_len": 8, "fp16": True}
ds_config_zero3_dict = self.get_config_dict(ZERO3)
ds_config_zero2_dict = self.get_config_dict(ZERO2)
@@ -808,7 +822,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
def get_dataset():
data_file = str(self.tests_dir / "fixtures/tests_samples/SQUAD/sample.json")
- data_files = dict(train=data_file, validation=data_file)
+ data_files = {"train": data_file, "validation": data_file}
raw_datasets = datasets.load_dataset("json", data_files=data_files, field="data")
train_dataset = raw_datasets["train"].map(_add_eos_to_examples).map(_convert_to_features, batched=True)
valid_dataset = deepcopy(train_dataset)
@@ -903,7 +917,14 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
do_train = True
do_eval = False
- kwargs = dict(stage=stage, dtype=dtype, eval_steps=1, distributed=True, do_train=do_train, do_eval=do_eval)
+ kwargs = {
+ "stage": stage,
+ "dtype": dtype,
+ "eval_steps": 1,
+ "distributed": True,
+ "do_train": do_train,
+ "do_eval": do_eval,
+ }
# 1. normal training
output_dir = self.run_and_check(**kwargs)
diff --git a/tests/deepspeed/test_model_zoo.py b/tests/deepspeed/test_model_zoo.py
index 984c7e7565..e51fe1e7cf 100644
--- a/tests/deepspeed/test_model_zoo.py
+++ b/tests/deepspeed/test_model_zoo.py
@@ -166,8 +166,8 @@ def make_task_cmds():
# but need a tiny model for each
#
# should have "{model_type.upper()}_TINY" corresponding vars defined, e.g., T5_TINY, etc.
- tasks2models = dict(
- trans=[
+ tasks2models = {
+ "trans": [
"bart",
"fsmt",
"m2m_100",
@@ -177,10 +177,10 @@ def make_task_cmds():
"t5_v1",
# "mt5", missing model files
],
- sum=[
+ "sum": [
"pegasus",
],
- clm=[
+ "clm": [
"big_bird",
"bigbird_pegasus",
"blenderbot",
@@ -192,7 +192,7 @@ def make_task_cmds():
"prophetnet",
# "camembert", missing model files
],
- mlm=[
+ "mlm": [
"albert",
"deberta",
"deberta-v2",
@@ -203,7 +203,7 @@ def make_task_cmds():
"layoutlm",
# "reformer", # multiple issues with either mlm/qa/clas
],
- qa=[
+ "qa": [
"led",
"longformer",
"mobilebert",
@@ -213,7 +213,7 @@ def make_task_cmds():
# "convbert", # missing tokenizer files
# "layoutlmv2", missing model files
],
- clas=[
+ "clas": [
"bert",
"xlnet",
# "hubert", # missing tokenizer files
@@ -223,54 +223,54 @@ def make_task_cmds():
# "openai-gpt", missing model files
# "tapas", multiple issues
],
- img_clas=[
+ "img_clas": [
"vit",
],
- )
+ }
scripts_dir = f"{ROOT_DIRECTORY}/examples/pytorch"
- tasks = dict(
- trans=f"""
+ tasks = {
+ "trans": f"""
{scripts_dir}/translation/run_translation.py
--train_file {data_dir_wmt}/train.json
--source_lang en
--target_lang ro
""",
- sum=f"""
+ "sum": f"""
{scripts_dir}/summarization/run_summarization.py
--train_file {data_dir_xsum}/sample.json
--max_source_length 12
--max_target_length 12
--lang en
""",
- clm=f"""
+ "clm": f"""
{scripts_dir}/language-modeling/run_clm.py
--train_file {FIXTURE_DIRECTORY}/sample_text.txt
--block_size 8
""",
- mlm=f"""
+ "mlm": f"""
{scripts_dir}/language-modeling/run_mlm.py
--train_file {FIXTURE_DIRECTORY}/sample_text.txt
""",
- qa=f"""
+ "qa": f"""
{scripts_dir}/question-answering/run_qa.py
--train_file {data_dir_samples}/SQUAD/sample.json
""",
- clas=f"""
+ "clas": f"""
{scripts_dir}/text-classification/run_glue.py
--train_file {data_dir_samples}/MRPC/train.csv
--max_seq_length 12
--task_name MRPC
""",
- img_clas=f"""
+ "img_clas": f"""
{scripts_dir}/image-classification/run_image_classification.py
--dataset_name hf-internal-testing/cats_vs_dogs_sample
--remove_unused_columns False
--max_steps 10
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
""",
- )
+ }
launcher = get_launcher(distributed=True)
diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py
index d86fb337af..8953adaa24 100644
--- a/tests/extended/test_trainer_ext.py
+++ b/tests/extended/test_trainer_ext.py
@@ -155,21 +155,21 @@ class TestTrainerExt(TestCasePlus):
@require_torch_multi_gpu
def test_trainer_log_level_replica(self, experiment_id):
# as each sub-test is slow-ish split into multiple sub-tests to avoid CI timeout
- experiments = dict(
+ experiments = {
# test with the default log_level - should be info and thus log info once
- base=dict(extra_args_str="", n_matches=1),
+ "base": {"extra_args_str": "", "n_matches": 1},
# test with low log_level and log_level_replica - should be noisy on all processes
# now the info string should appear twice on 2 processes
- low=dict(extra_args_str="--log_level debug --log_level_replica debug", n_matches=2),
+ "low": {"extra_args_str": "--log_level debug --log_level_replica debug", "n_matches": 2},
# test with high log_level and low log_level_replica
# now the info string should appear once only on the replica
- high=dict(extra_args_str="--log_level error --log_level_replica debug", n_matches=1),
+ "high": {"extra_args_str": "--log_level error --log_level_replica debug", "n_matches": 1},
# test with high log_level and log_level_replica - should be quiet on all processes
- mixed=dict(extra_args_str="--log_level error --log_level_replica error", n_matches=0),
- )
+ "mixed": {"extra_args_str": "--log_level error --log_level_replica error", "n_matches": 0},
+ }
data = experiments[experiment_id]
- kwargs = dict(distributed=True, predict_with_generate=False, do_eval=False, do_predict=False)
+ kwargs = {"distributed": True, "predict_with_generate": False, "do_eval": False, "do_predict": False}
log_info_string = "Running training"
with CaptureStderr() as cl:
self.run_seq2seq_quick(**kwargs, extra_args_str=data["extra_args_str"])
diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
index 1287e4a876..b0d23b6fff 100644
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@@ -1480,7 +1480,7 @@ class GenerationTesterMixin:
signature = inspect.signature(model.forward)
# We want to test only models where encoder/decoder head masking is implemented
- if not set(head_masking.keys()) < set([*signature.parameters.keys()]):
+ if not set(head_masking.keys()) < {*signature.parameters.keys()}:
continue
for attn_name, (name, mask) in zip(attention_names, head_masking.items()):
diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py
index b8f045442d..e1e525be3d 100644
--- a/tests/models/bart/test_modeling_bart.py
+++ b/tests/models/bart/test_modeling_bart.py
@@ -939,7 +939,7 @@ class BartModelIntegrationTests(unittest.TestCase):
def test_xsum_config_generation_params(self):
config = BartConfig.from_pretrained("facebook/bart-large-xsum")
- expected_params = dict(num_beams=6, do_sample=False, early_stopping=True, length_penalty=1.0)
+ expected_params = {"num_beams": 6, "do_sample": False, "early_stopping": True, "length_penalty": 1.0}
config_params = {k: getattr(config, k, "MISSING") for k, v in expected_params.items()}
self.assertDictEqual(expected_params, config_params)
diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py
index 671541328d..1cc5377cf2 100644
--- a/tests/models/blenderbot/test_modeling_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_blenderbot.py
@@ -299,8 +299,8 @@ class Blenderbot3BIntegrationTests(unittest.TestCase):
@slow
def test_generation_from_short_input_same_as_parlai_3B(self):
- FASTER_GEN_KWARGS = dict(num_beams=1, early_stopping=True, min_length=15, max_length=25)
- TOK_DECODE_KW = dict(skip_special_tokens=True, clean_up_tokenization_spaces=True)
+ FASTER_GEN_KWARGS = {"num_beams": 1, "early_stopping": True, "min_length": 15, "max_length": 25}
+ TOK_DECODE_KW = {"skip_special_tokens": True, "clean_up_tokenization_spaces": True}
torch.cuda.empty_cache()
model = BlenderbotForConditionalGeneration.from_pretrained(self.ckpt).half().to(torch_device)
diff --git a/tests/models/blenderbot/test_modeling_flax_blenderbot.py b/tests/models/blenderbot/test_modeling_flax_blenderbot.py
index 771a388d4a..ffcc9a7d04 100644
--- a/tests/models/blenderbot/test_modeling_flax_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_flax_blenderbot.py
@@ -402,8 +402,8 @@ class FlaxBlenderbotModelTest(FlaxModelTesterMixin, unittest.TestCase, FlaxGener
@unittest.skipUnless(jax_device != "cpu", "3B test too slow on CPU.")
@slow
def test_generation_from_short_input_same_as_parlai_3B(self):
- FASTER_GEN_KWARGS = dict(num_beams=1, early_stopping=True, min_length=15, max_length=25)
- TOK_DECODE_KW = dict(skip_special_tokens=True, clean_up_tokenization_spaces=True)
+ FASTER_GEN_KWARGS = {"num_beams": 1, "early_stopping": True, "min_length": 15, "max_length": 25}
+ TOK_DECODE_KW = {"skip_special_tokens": True, "clean_up_tokenization_spaces": True}
model = FlaxBlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-3B", from_pt=True)
tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-3B")
diff --git a/tests/models/bloom/test_tokenization_bloom.py b/tests/models/bloom/test_tokenization_bloom.py
index 88ead384e0..4857e2ab5f 100644
--- a/tests/models/bloom/test_tokenization_bloom.py
+++ b/tests/models/bloom/test_tokenization_bloom.py
@@ -124,7 +124,7 @@ class BloomTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
input_text = list(sample_data.values())
output_tokens = list(map(tokenizer.encode, input_text))
- predicted_text = list(map(lambda x: tokenizer.decode(x, clean_up_tokenization_spaces=False), output_tokens))
+ predicted_text = [tokenizer.decode(x, clean_up_tokenization_spaces=False) for x in output_tokens]
self.assertListEqual(predicted_text, input_text)
def test_pretrained_model_lists(self):
diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py
index 88ad5be374..cee1205db9 100644
--- a/tests/models/clip/test_modeling_tf_clip.py
+++ b/tests/models/clip/test_modeling_tf_clip.py
@@ -551,7 +551,7 @@ class TFCLIPModelTest(TFModelTesterMixin, unittest.TestCase):
if self.__class__.__name__ == "TFCLIPModelTest":
inputs_dict.pop("return_loss", None)
- tf_main_layer_classes = set(
+ tf_main_layer_classes = {
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
@@ -563,7 +563,7 @@ class TFCLIPModelTest(TFModelTesterMixin, unittest.TestCase):
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
- )
+ }
for main_layer_class in tf_main_layer_classes:
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
if "T5" in main_layer_class.__name__:
diff --git a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py
index eb085af0d8..0fa14e526a 100644
--- a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py
+++ b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py
@@ -398,7 +398,7 @@ class TFData2VecVisionModelTest(TFModelTesterMixin, unittest.TestCase):
# The number of elements in the loss should be the same as the number of elements in the label
_, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit()
added_label = prepared_for_class[
- sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]
+ sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]
]
loss_size = tf.size(added_label)
diff --git a/tests/models/groupvit/test_modeling_tf_groupvit.py b/tests/models/groupvit/test_modeling_tf_groupvit.py
index 6283ab8988..24a493445c 100644
--- a/tests/models/groupvit/test_modeling_tf_groupvit.py
+++ b/tests/models/groupvit/test_modeling_tf_groupvit.py
@@ -628,7 +628,7 @@ class TFGroupViTModelTest(TFModelTesterMixin, unittest.TestCase):
if self.__class__.__name__ == "TFGroupViTModelTest":
inputs_dict.pop("return_loss", None)
- tf_main_layer_classes = set(
+ tf_main_layer_classes = {
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
@@ -640,7 +640,7 @@ class TFGroupViTModelTest(TFModelTesterMixin, unittest.TestCase):
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
- )
+ }
for main_layer_class in tf_main_layer_classes:
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
if "T5" in main_layer_class.__name__:
diff --git a/tests/models/jukebox/test_modeling_jukebox.py b/tests/models/jukebox/test_modeling_jukebox.py
index e77c8cb2eb..5f073bbd49 100644
--- a/tests/models/jukebox/test_modeling_jukebox.py
+++ b/tests/models/jukebox/test_modeling_jukebox.py
@@ -30,10 +30,10 @@ if is_torch_available():
class Jukebox1bModelTester(unittest.TestCase):
all_model_classes = (JukeboxModel,) if is_torch_available() else ()
model_id = "openai/jukebox-1b-lyrics"
- metas = dict(
- artist="Zac Brown Band",
- genres="Country",
- lyrics="""I met a traveller from an antique land,
+ metas = {
+ "artist": "Zac Brown Band",
+ "genres": "Country",
+ "lyrics": """I met a traveller from an antique land,
Who said "Two vast and trunkless legs of stone
Stand in the desert. . . . Near them, on the sand,
Half sunk a shattered visage lies, whose frown,
@@ -48,7 +48,7 @@ class Jukebox1bModelTester(unittest.TestCase):
Of that colossal Wreck, boundless and bare
The lone and level sands stretch far away
""",
- )
+ }
# fmt: off
EXPECTED_OUTPUT_2 = [
1864, 1536, 1213, 1870, 1357, 1536, 519, 880, 1323, 789, 1082, 534,
@@ -180,7 +180,7 @@ class Jukebox1bModelTester(unittest.TestCase):
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
set_seed(0)
waveform = torch.rand((1, 5120, 1))
- tokens = [i for i in self.prepare_inputs()]
+ tokens = list(self.prepare_inputs())
zs = [model.vqvae.encode(waveform, start_level=2, bs_chunks=waveform.shape[0])[0], None, None]
zs = model._sample(
@@ -220,10 +220,10 @@ class Jukebox1bModelTester(unittest.TestCase):
class Jukebox5bModelTester(unittest.TestCase):
all_model_classes = (JukeboxModel,) if is_torch_available() else ()
model_id = "openai/jukebox-5b-lyrics"
- metas = dict(
- artist="Zac Brown Band",
- genres="Country",
- lyrics="""I met a traveller from an antique land,
+ metas = {
+ "artist": "Zac Brown Band",
+ "genres": "Country",
+ "lyrics": """I met a traveller from an antique land,
Who said "Two vast and trunkless legs of stone
Stand in the desert. . . . Near them, on the sand,
Half sunk a shattered visage lies, whose frown,
@@ -238,7 +238,7 @@ class Jukebox5bModelTester(unittest.TestCase):
Of that colossal Wreck, boundless and bare
The lone and level sands stretch far away
""",
- )
+ }
# fmt: off
EXPECTED_OUTPUT_2 = [
diff --git a/tests/models/jukebox/test_tokenization_jukebox.py b/tests/models/jukebox/test_tokenization_jukebox.py
index 7ce2585bdd..c434cf6aa1 100644
--- a/tests/models/jukebox/test_tokenization_jukebox.py
+++ b/tests/models/jukebox/test_tokenization_jukebox.py
@@ -21,10 +21,10 @@ from transformers.testing_utils import require_torch
class JukeboxTokenizationTest(unittest.TestCase):
tokenizer_class = JukeboxTokenizer
- metas = dict(
- artist="Zac Brown Band",
- genres="Country",
- lyrics="""I met a traveller from an antique land,
+ metas = {
+ "artist": "Zac Brown Band",
+ "genres": "Country",
+ "lyrics": """I met a traveller from an antique land,
Who said "Two vast and trunkless legs of stone
Stand in the desert. . . . Near them, on the sand,
Half sunk a shattered visage lies, whose frown,
@@ -39,7 +39,7 @@ class JukeboxTokenizationTest(unittest.TestCase):
Of that colossal Wreck, boundless and bare
The lone and level sands stretch far away
""",
- )
+ }
@require_torch
def test_1b_lyrics_tokenizer(self):
diff --git a/tests/models/layoutlmv2/test_processor_layoutlmv2.py b/tests/models/layoutlmv2/test_processor_layoutlmv2.py
index 18f4f8d5ac..91a8da9caf 100644
--- a/tests/models/layoutlmv2/test_processor_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_processor_layoutlmv2.py
@@ -233,7 +233,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify image
@@ -253,7 +253,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify images
@@ -301,7 +301,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -340,7 +340,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "labels", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -362,7 +362,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "labels", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -403,7 +403,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -422,7 +422,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -456,7 +456,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -472,7 +472,7 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "token_type_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
diff --git a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py
index 39de55efad..f6b51c6d71 100644
--- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py
@@ -320,7 +320,7 @@ class TFLayoutLMv3ModelTest(TFModelTesterMixin, unittest.TestCase):
# The number of elements in the loss should be the same as the number of elements in the label
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
added_label = prepared_for_class[
- sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]
+ sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]
]
expected_loss_size = added_label.shape.as_list()[:1]
diff --git a/tests/models/layoutlmv3/test_processor_layoutlmv3.py b/tests/models/layoutlmv3/test_processor_layoutlmv3.py
index 56f7925846..f649e0c275 100644
--- a/tests/models/layoutlmv3/test_processor_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_processor_layoutlmv3.py
@@ -213,7 +213,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify image
@@ -235,7 +235,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify images
@@ -285,7 +285,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -324,7 +324,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "labels", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -346,7 +346,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "labels", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -387,7 +387,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -406,7 +406,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -440,7 +440,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -456,7 +456,7 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
diff --git a/tests/models/layoutxlm/test_processor_layoutxlm.py b/tests/models/layoutxlm/test_processor_layoutxlm.py
index 2843528bae..5d74bacfa0 100644
--- a/tests/models/layoutxlm/test_processor_layoutxlm.py
+++ b/tests/models/layoutxlm/test_processor_layoutxlm.py
@@ -228,7 +228,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify image
@@ -250,7 +250,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify images
@@ -300,7 +300,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -339,7 +339,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "labels"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -361,7 +361,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids", "labels"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -402,7 +402,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -421,7 +421,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -455,7 +455,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -471,7 +471,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "bbox", "image", "input_ids"]
- actual_keys = sorted(list(input_processor.keys()))
+ actual_keys = sorted(input_processor.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
diff --git a/tests/models/markuplm/test_processor_markuplm.py b/tests/models/markuplm/test_processor_markuplm.py
index 141d7bae18..eb09701593 100644
--- a/tests/models/markuplm/test_processor_markuplm.py
+++ b/tests/models/markuplm/test_processor_markuplm.py
@@ -204,7 +204,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -216,7 +216,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -260,7 +260,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -294,7 +294,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
"xpath_subs_seq",
"xpath_tags_seq",
]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -331,7 +331,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
"xpath_subs_seq",
"xpath_tags_seq",
]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -367,7 +367,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -390,7 +390,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -425,7 +425,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
@@ -444,7 +444,7 @@ class MarkupLMProcessorIntegrationTests(unittest.TestCase):
# verify keys
expected_keys = ["attention_mask", "input_ids", "token_type_ids", "xpath_subs_seq", "xpath_tags_seq"]
- actual_keys = sorted(list(inputs.keys()))
+ actual_keys = sorted(inputs.keys())
self.assertListEqual(actual_keys, expected_keys)
# verify input_ids
diff --git a/tests/models/mobilevit/test_modeling_tf_mobilevit.py b/tests/models/mobilevit/test_modeling_tf_mobilevit.py
index eea07f9413..9bb3872274 100644
--- a/tests/models/mobilevit/test_modeling_tf_mobilevit.py
+++ b/tests/models/mobilevit/test_modeling_tf_mobilevit.py
@@ -295,7 +295,7 @@ class MobileViTModelTest(TFModelTesterMixin, unittest.TestCase):
# The number of elements in the loss should be the same as the number of elements in the label
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
added_label = prepared_for_class[
- sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]
+ sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]
]
expected_loss_size = added_label.shape.as_list()[:1]
diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py
index f07b874676..872aed47e2 100644
--- a/tests/models/perceiver/test_modeling_perceiver.py
+++ b/tests/models/perceiver/test_modeling_perceiver.py
@@ -166,9 +166,11 @@ class PerceiverModelTester:
audio = torch.randn(
(self.batch_size, self.num_frames * self.audio_samples_per_frame, 1), device=torch_device
)
- inputs = dict(
- image=images, audio=audio, label=torch.zeros((self.batch_size, self.num_labels), device=torch_device)
- )
+ inputs = {
+ "image": images,
+ "audio": audio,
+ "label": torch.zeros((self.batch_size, self.num_labels), device=torch_device),
+ }
else:
raise ValueError(f"Model class {model_class} not supported")
@@ -734,7 +736,7 @@ class PerceiverModelTest(ModelTesterMixin, unittest.TestCase):
continue
config, inputs, input_mask, _, _ = self.model_tester.prepare_config_and_inputs(model_class=model_class)
- inputs_dict = dict(inputs=inputs, attention_mask=input_mask)
+ inputs_dict = {"inputs": inputs, "attention_mask": input_mask}
for problem_type in problem_types:
with self.subTest(msg=f"Testing {model_class} with {problem_type['title']}"):
diff --git a/tests/models/roc_bert/test_tokenization_roc_bert.py b/tests/models/roc_bert/test_tokenization_roc_bert.py
index 334a347a1e..0f8fe08efd 100644
--- a/tests/models/roc_bert/test_tokenization_roc_bert.py
+++ b/tests/models/roc_bert/test_tokenization_roc_bert.py
@@ -44,8 +44,8 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
super().setUp()
vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]", "你", "好", "是", "谁", "a", "b", "c", "d"]
- word_shape = dict()
- word_pronunciation = dict()
+ word_shape = {}
+ word_pronunciation = {}
for i, value in enumerate(vocab_tokens):
word_shape[value] = i
word_pronunciation[value] = i
diff --git a/tests/models/segformer/test_modeling_tf_segformer.py b/tests/models/segformer/test_modeling_tf_segformer.py
index bfcc580bb4..4bb423bfca 100644
--- a/tests/models/segformer/test_modeling_tf_segformer.py
+++ b/tests/models/segformer/test_modeling_tf_segformer.py
@@ -362,9 +362,7 @@ class TFSegformerModelTest(TFModelTesterMixin, unittest.TestCase):
_, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit(
for_segmentation=for_segmentation
)
- added_label = prepared_for_class[
- sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]
- ]
+ added_label = prepared_for_class[sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]]
loss_size = tf.size(added_label)
# Test that model correctly compute the loss with kwargs
diff --git a/tests/models/speecht5/test_feature_extraction_speecht5.py b/tests/models/speecht5/test_feature_extraction_speecht5.py
index 34cf071bd1..390b769b8d 100644
--- a/tests/models/speecht5/test_feature_extraction_speecht5.py
+++ b/tests/models/speecht5/test_feature_extraction_speecht5.py
@@ -372,7 +372,7 @@ class SpeechT5FeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest
)
self.assertIn("attention_mask", processed_pad)
self.assertListEqual(
- list(processed_pad.attention_mask.shape), list((processed_pad[input_name].shape[0], max_length))
+ list(processed_pad.attention_mask.shape), [processed_pad[input_name].shape[0], max_length]
)
self.assertListEqual(
processed_pad.attention_mask[:, :max_length].sum(-1).tolist(), [max_length for x in speech_inputs]
diff --git a/tests/models/t5/test_tokenization_t5.py b/tests/models/t5/test_tokenization_t5.py
index 8dbef67297..16ff9f04de 100644
--- a/tests/models/t5/test_tokenization_t5.py
+++ b/tests/models/t5/test_tokenization_t5.py
@@ -387,7 +387,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_get_sentinel_token_ids(self):
tokenizer = T5Tokenizer(SAMPLE_VOCAB, extra_ids=10)
- self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted([i for i in range(1000, 1010)]))
+ self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010)))
def test_get_sentinel_tokens_for_fasttokenizer(self):
tokenizer = T5TokenizerFast(SAMPLE_VOCAB, extra_ids=10)
@@ -398,4 +398,4 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_get_sentinel_token_ids_for_fasttokenizer(self):
tokenizer = T5TokenizerFast(SAMPLE_VOCAB, extra_ids=10)
- self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted([i for i in range(1000, 1010)]))
+ self.assertListEqual(sorted(tokenizer.get_sentinel_token_ids()), sorted(range(1000, 1010)))
diff --git a/tests/models/transfo_xl/test_modeling_transfo_xl.py b/tests/models/transfo_xl/test_modeling_transfo_xl.py
index 7375475a95..89ac1d3b09 100644
--- a/tests/models/transfo_xl/test_modeling_transfo_xl.py
+++ b/tests/models/transfo_xl/test_modeling_transfo_xl.py
@@ -347,7 +347,7 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
# Retrieve the cutoffs and copy them
copied_cutoffs = copy.copy(model_embed.cutoffs)
- test_layers = [x for x in range(config.div_val)]
+ test_layers = list(range(config.div_val))
for layer in test_layers:
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model_embed = model.resize_token_embeddings(model_vocab_size + 10, layer)
diff --git a/tests/models/tvlt/test_modeling_tvlt.py b/tests/models/tvlt/test_modeling_tvlt.py
index 0f3d5ab68a..bb6d2df0d9 100644
--- a/tests/models/tvlt/test_modeling_tvlt.py
+++ b/tests/models/tvlt/test_modeling_tvlt.py
@@ -581,7 +581,7 @@ class TvltModelIntegrationTest(unittest.TestCase):
audio = prepare_audio()
video_inputs = image_processor(video, return_tensors="pt").to(torch_device)
audio_inputs = audio_feature_extractor(audio, return_tensors="pt").to(torch_device)
- inputs = dict()
+ inputs = {}
inputs.update(video_inputs)
inputs.update(audio_inputs)
@@ -606,7 +606,7 @@ class TvltModelIntegrationTest(unittest.TestCase):
video_mixed_inputs = image_processor(video_mixed, is_mixed=True, return_tensors="pt").to(torch_device)
audio_inputs = audio_feature_extractor(audio, return_tensors="pt", mask_audio=True).to(torch_device)
labels = torch.tensor([[0.0]], device=torch_device)
- inputs = dict()
+ inputs = {}
inputs.update(video_inputs)
inputs.update(video_mixed_inputs)
inputs.update(audio_inputs)
diff --git a/tests/models/vit_mae/test_modeling_tf_vit_mae.py b/tests/models/vit_mae/test_modeling_tf_vit_mae.py
index 8c19c01491..48bda3aec7 100644
--- a/tests/models/vit_mae/test_modeling_tf_vit_mae.py
+++ b/tests/models/vit_mae/test_modeling_tf_vit_mae.py
@@ -333,7 +333,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
- tf_main_layer_classes = set(
+ tf_main_layer_classes = {
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
@@ -345,7 +345,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
- )
+ }
num_patches = int((config.image_size // config.patch_size) ** 2)
noise = np.random.uniform(size=(self.model_tester.batch_size, num_patches))
diff --git a/tests/models/wav2vec2/test_tokenization_wav2vec2.py b/tests/models/wav2vec2/test_tokenization_wav2vec2.py
index 4027e0cefc..cf5dc100c2 100644
--- a/tests/models/wav2vec2/test_tokenization_wav2vec2.py
+++ b/tests/models/wav2vec2/test_tokenization_wav2vec2.py
@@ -231,7 +231,7 @@ class Wav2Vec2TokenizerTest(unittest.TestCase):
tokenizer_files = tokenizer.save_pretrained(tmpdirname2)
self.assertSequenceEqual(
sorted(tuple(VOCAB_FILES_NAMES.values()) + ("special_tokens_map.json", "added_tokens.json")),
- sorted(tuple(x.split(os.path.sep)[-1] for x in tokenizer_files)),
+ sorted(x.split(os.path.sep)[-1] for x in tokenizer_files),
)
# Checks everything loads correctly in the same way
@@ -456,7 +456,7 @@ class Wav2Vec2CTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
def test_special_characters_in_vocab(self):
sent = "ʈʰ æ æ̃ ˧ kʰ"
- vocab_dict = {k: v for v, k in enumerate({phoneme for phoneme in sent.split()})}
+ vocab_dict = {k: v for v, k in enumerate(set(sent.split()))}
vocab_file = os.path.join(self.tmpdirname, "vocab_special.json")
with open(vocab_file, "w") as f:
diff --git a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
index df5db0a3e2..a98ea55d0b 100644
--- a/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
+++ b/tests/models/wav2vec2_with_lm/test_processor_wav2vec2_with_lm.py
@@ -215,7 +215,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
with get_context(pool_context).Pool() as pool:
decoded_processor = processor.batch_decode(logits, pool)
- logits_list = [array for array in logits]
+ logits_list = list(logits)
with get_context("fork").Pool() as p:
decoded_beams = decoder.decode_beams_batch(p, logits_list)
@@ -252,7 +252,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
)
decoded_processor = decoded_processor_out.text
- logits_list = [array for array in logits]
+ logits_list = list(logits)
with get_context("fork").Pool() as pool:
decoded_decoder_out = decoder.decode_beams_batch(
@@ -299,7 +299,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
)
decoded_processor = decoded_processor_out.text
- logits_list = [array for array in logits]
+ logits_list = list(logits)
decoder.reset_params(
alpha=alpha,
beta=beta,
diff --git a/tests/models/xlnet/test_modeling_tf_xlnet.py b/tests/models/xlnet/test_modeling_tf_xlnet.py
index a8686d4a2b..230ef7a28e 100644
--- a/tests/models/xlnet/test_modeling_tf_xlnet.py
+++ b/tests/models/xlnet/test_modeling_tf_xlnet.py
@@ -400,7 +400,7 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
# The number of elements in the loss should be the same as the number of elements in the label
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
added_label = prepared_for_class[
- sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]
+ sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]
]
expected_loss_size = added_label.shape.as_list()[:1]
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 6c61909527..4070966437 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -606,7 +606,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2})
self.assertEqual(len(dataset), 4)
- outputs = [item for item in dataset]
+ outputs = list(dataset)
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
@@ -624,7 +624,7 @@ class PipelineUtilsTest(unittest.TestCase):
with self.assertRaises(TypeError):
len(dataset)
- outputs = [item for item in dataset]
+ outputs = list(dataset)
self.assertEqual(outputs, [2, 3, 4, 5])
@require_torch
@@ -638,7 +638,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
- outputs = [item for item in dataset]
+ outputs = list(dataset)
self.assertEqual(outputs, [{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}])
@require_torch
@@ -654,7 +654,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelineIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
- outputs = [item for item in dataset]
+ outputs = list(dataset)
self.assertEqual(
nested_simplify(outputs), [{"id": [[12, 22]]}, {"id": [[2, 3]]}, {"id": [[2, 4]]}, {"id": [[5]]}]
)
@@ -671,7 +671,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelineChunkIterator(dataset, preprocess_chunk, {}, loader_batch_size=3)
- outputs = [item for item in dataset]
+ outputs = list(dataset)
self.assertEqual(outputs, [0, 1, 0, 1, 2])
@@ -692,7 +692,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelinePackIterator(dataset, pack, {})
- outputs = [item for item in dataset]
+ outputs = list(dataset)
self.assertEqual(
outputs,
[
@@ -719,7 +719,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
- outputs = [item for item in dataset]
+ outputs = list(dataset)
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}], [{"id": 4}, {"id": 5}]])
# is_false Across batch
@@ -730,7 +730,7 @@ class PipelineUtilsTest(unittest.TestCase):
dataset = PipelinePackIterator(dummy_dataset, add, {"extra": 2}, loader_batch_size=3)
- outputs = [item for item in dataset]
+ outputs = list(dataset)
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]])
@slow
diff --git a/tests/pipelines/test_pipelines_fill_mask.py b/tests/pipelines/test_pipelines_fill_mask.py
index 43825ae0f5..b5260488fb 100644
--- a/tests/pipelines/test_pipelines_fill_mask.py
+++ b/tests/pipelines/test_pipelines_fill_mask.py
@@ -281,7 +281,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
def run_test_targets(self, model, tokenizer):
vocab = tokenizer.get_vocab()
- targets = list(sorted(vocab.keys()))[:2]
+ targets = sorted(vocab.keys())[:2]
# Pipeline argument
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, targets=targets)
outputs = fill_masker(f"This is a {tokenizer.mask_token}")
@@ -293,8 +293,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
],
)
target_ids = {vocab[el] for el in targets}
- self.assertEqual(set(el["token"] for el in outputs), target_ids)
- self.assertEqual(set(el["token_str"] for el in outputs), set(targets))
+ self.assertEqual({el["token"] for el in outputs}, target_ids)
+ self.assertEqual({el["token_str"] for el in outputs}, set(targets))
# Call argument
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
@@ -307,8 +307,8 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
],
)
target_ids = {vocab[el] for el in targets}
- self.assertEqual(set(el["token"] for el in outputs), target_ids)
- self.assertEqual(set(el["token_str"] for el in outputs), set(targets))
+ self.assertEqual({el["token"] for el in outputs}, target_ids)
+ self.assertEqual({el["token_str"] for el in outputs}, set(targets))
# Score equivalence
outputs = fill_masker(f"This is a {tokenizer.mask_token}", targets=targets)
@@ -354,7 +354,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
# top_k=2, ntargets=3
- targets = list(sorted(vocab.keys()))[:3]
+ targets = sorted(vocab.keys())[:3]
outputs = fill_masker(f"This is a {tokenizer.mask_token}", top_k=2, targets=targets)
# If we use the most probably targets, and filter differently, we should still
@@ -369,7 +369,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
vocab = tokenizer.get_vocab()
# String duplicates + id duplicates
- targets = list(sorted(vocab.keys()))[:3]
+ targets = sorted(vocab.keys())[:3]
targets = [targets[0], targets[1], targets[0], targets[2], targets[1]]
outputs = fill_masker(f"My name is {tokenizer.mask_token}", targets=targets, top_k=10)
diff --git a/tests/pipelines/test_pipelines_video_classification.py b/tests/pipelines/test_pipelines_video_classification.py
index 9074196183..8390d21fc5 100644
--- a/tests/pipelines/test_pipelines_video_classification.py
+++ b/tests/pipelines/test_pipelines_video_classification.py
@@ -63,7 +63,7 @@ class VideoClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
def test_small_model_pt(self):
small_model = "hf-internal-testing/tiny-random-VideoMAEForVideoClassification"
small_feature_extractor = VideoMAEFeatureExtractor(
- size=dict(shortest_edge=10), crop_size=dict(height=10, width=10)
+ size={"shortest_edge": 10}, crop_size={"height": 10, "width": 10}
)
video_classifier = pipeline(
"video-classification", model=small_model, feature_extractor=small_feature_extractor, frame_sampling_rate=4
diff --git a/tests/repo_utils/test_tests_fetcher.py b/tests/repo_utils/test_tests_fetcher.py
index 0541b72d95..cd0109b535 100644
--- a/tests/repo_utils/test_tests_fetcher.py
+++ b/tests/repo_utils/test_tests_fetcher.py
@@ -56,9 +56,9 @@ class CheckDummiesTester(unittest.TestCase):
"pytorch_utils.py",
"models/bert/configuration_bert.py",
]
- expected_deps = set(os.path.join(transformers_path, f) for f in expected_deps)
+ expected_deps = {os.path.join(transformers_path, f) for f in expected_deps}
repo = Repo(git_repo_path)
with checkout_commit(repo, GIT_TEST_SHA):
deps = get_module_dependencies(bert_module)
- deps = set(os.path.expanduser(f) for f in deps)
+ deps = {os.path.expanduser(f) for f in deps}
self.assertEqual(deps, expected_deps)
diff --git a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py
index 01185fdaba..ecbe714a16 100644
--- a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py
+++ b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py
@@ -362,12 +362,12 @@ def main():
):
# Some have all caps in their config, some don't.
label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
- if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
+ if sorted(label_name_to_id.keys()) == sorted(label_list):
label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
- f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
+ f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
"\nIgnoring the model labels as a result.",
)
elif data_args.task_name is None and not is_regression:
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index 152ea7d6cd..eddf503334 100755
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -1643,7 +1643,7 @@ class ModelTesterMixin:
params = dict(model_reloaded.named_parameters())
params.update(dict(model_reloaded.named_buffers()))
# param_names = set(k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys())
- param_names = set(k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys())
+ param_names = {k[len(prefix) :] if k.startswith(prefix) else k for k in params.keys()}
missing_keys = set(infos["missing_keys"])
@@ -1770,8 +1770,8 @@ class ModelTesterMixin:
def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class):
"""For temporarily ignoring some failed test cases (issues to be fixed)"""
- tf_keys = set([k for k, v in tf_outputs.items() if v is not None])
- pt_keys = set([k for k, v in pt_outputs.items() if v is not None])
+ tf_keys = {k for k, v in tf_outputs.items() if v is not None}
+ pt_keys = {k for k, v in pt_outputs.items() if v is not None}
key_differences = tf_keys.symmetric_difference(pt_keys)
@@ -2995,7 +2995,7 @@ class ModelUtilsTest(TestCasePlus):
index = json.loads(f.read())
all_shards = set(index["weight_map"].values())
- shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".bin"))
+ shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".bin")}
self.assertSetEqual(all_shards, shards_found)
# Finally, check the model can be reloaded
diff --git a/tests/test_modeling_flax_common.py b/tests/test_modeling_flax_common.py
index f6737d8649..f93228e9b8 100644
--- a/tests/test_modeling_flax_common.py
+++ b/tests/test_modeling_flax_common.py
@@ -1099,7 +1099,7 @@ class FlaxModelTesterMixin:
index = json.loads(f.read())
all_shards = set(index["weight_map"].values())
- shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".msgpack"))
+ shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".msgpack")}
self.assertSetEqual(all_shards, shards_found)
# Finally, check the model can be reloaded
diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py
index ced3c0f86a..afd74411be 100644
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -398,7 +398,7 @@ class TFModelTesterMixin:
def test_keras_save_load(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
- tf_main_layer_classes = set(
+ tf_main_layer_classes = {
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
@@ -410,7 +410,7 @@ class TFModelTesterMixin:
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
- )
+ }
for main_layer_class in tf_main_layer_classes:
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
if "T5" in main_layer_class.__name__:
@@ -498,8 +498,8 @@ class TFModelTesterMixin:
def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class):
"""For temporarily ignoring some failed test cases (issues to be fixed)"""
- tf_keys = set([k for k, v in tf_outputs.items() if v is not None])
- pt_keys = set([k for k, v in pt_outputs.items() if v is not None])
+ tf_keys = {k for k, v in tf_outputs.items() if v is not None}
+ pt_keys = {k for k, v in pt_outputs.items() if v is not None}
key_differences = tf_keys.symmetric_difference(pt_keys)
@@ -1455,7 +1455,7 @@ class TFModelTesterMixin:
continue
# The number of elements in the loss should be the same as the number of elements in the label
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
- added_label_names = sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)
+ added_label_names = sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)
if not added_label_names:
continue # This test is only for models with easily-separable labels
added_label = prepared_for_class[added_label_names[0]]
@@ -1713,7 +1713,7 @@ class TFModelTesterMixin:
}
signature = inspect.signature(model.call)
- if set(head_masking.keys()) < set([*signature.parameters.keys()]):
+ if set(head_masking.keys()) < {*signature.parameters.keys()}:
continue
for attn_name, (name, mask) in zip(attention_names, head_masking.items()):
@@ -2274,7 +2274,7 @@ class UtilsFunctionsTest(unittest.TestCase):
index = json.loads(f.read())
all_shards = set(index["weight_map"].values())
- shards_found = set(f for f in os.listdir(tmp_dir) if f.endswith(".h5"))
+ shards_found = {f for f in os.listdir(tmp_dir) if f.endswith(".h5")}
self.assertSetEqual(all_shards, shards_found)
# Finally, check the model can be reloaded
diff --git a/tests/test_sequence_feature_extraction_common.py b/tests/test_sequence_feature_extraction_common.py
index 710ad01250..4c09c1c262 100644
--- a/tests/test_sequence_feature_extraction_common.py
+++ b/tests/test_sequence_feature_extraction_common.py
@@ -417,7 +417,7 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin):
)
self.assertIn("attention_mask", processed_pad)
self.assertListEqual(
- list(processed_pad.attention_mask.shape), list((processed_pad[input_name].shape[0], max_length))
+ list(processed_pad.attention_mask.shape), [processed_pad[input_name].shape[0], max_length]
)
self.assertListEqual(
processed_pad.attention_mask[:, :max_length].sum(-1).tolist(), [max_length for x in speech_inputs]
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 2c26deeffe..d167b646c0 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -1148,7 +1148,13 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# won't be the same since the training dataloader is shuffled).
with tempfile.TemporaryDirectory() as tmpdir:
- kwargs = dict(output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1, logging_steps=5)
+ kwargs = {
+ "output_dir": tmpdir,
+ "train_len": 128,
+ "save_steps": 5,
+ "learning_rate": 0.1,
+ "logging_steps": 5,
+ }
trainer = get_regression_trainer(**kwargs)
trainer.train()
(a, b) = trainer.model.a.item(), trainer.model.b.item()
@@ -1181,7 +1187,13 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# With a regular model that is not a PreTrainedModel
with tempfile.TemporaryDirectory() as tmpdir:
- kwargs = dict(output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1, pretrained=False)
+ kwargs = {
+ "output_dir": tmpdir,
+ "train_len": 128,
+ "save_steps": 5,
+ "learning_rate": 0.1,
+ "pretrained": False,
+ }
trainer = get_regression_trainer(**kwargs)
trainer.train()
diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py
index a88ca1cb0d..8e851132c2 100644
--- a/tests/trainer/test_trainer_callback.py
+++ b/tests/trainer/test_trainer_callback.py
@@ -108,8 +108,8 @@ class TrainerCallbackTest(unittest.TestCase):
self.assertEqual(len(cbs1), len(cbs2))
# Order doesn't matter
- cbs1 = list(sorted(cbs1, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__))
- cbs2 = list(sorted(cbs2, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__))
+ cbs1 = sorted(cbs1, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__)
+ cbs2 = sorted(cbs2, key=lambda cb: cb.__name__ if isinstance(cb, type) else cb.__class__.__name__)
for cb1, cb2 in zip(cbs1, cbs2):
if isinstance(cb1, type) and isinstance(cb2, type):
diff --git a/tests/trainer/test_trainer_utils.py b/tests/trainer/test_trainer_utils.py
index 869d19b0a1..ccf162677e 100644
--- a/tests/trainer/test_trainer_utils.py
+++ b/tests/trainer/test_trainer_utils.py
@@ -189,7 +189,7 @@ class TrainerUtilsTest(unittest.TestCase):
# The biggest element should be first
self.assertEqual(lengths[indices[0]], 50)
# The indices should be a permutation of range(100)
- self.assertEqual(list(sorted(indices)), list(range(100)))
+ self.assertEqual(sorted(indices), list(range(100)))
def test_group_by_length_with_dict(self):
# Get some inputs of random lengths
@@ -204,7 +204,7 @@ class TrainerUtilsTest(unittest.TestCase):
# The biggest element should be first
self.assertEqual(len(data[indices[0]]["input_ids"]), 105)
# The indices should be a permutation of range(6)
- self.assertEqual(list(sorted(indices)), list(range(6)))
+ self.assertEqual(sorted(indices), list(range(6)))
def test_group_by_length_with_batch_encoding(self):
# Get some inputs of random lengths
@@ -219,7 +219,7 @@ class TrainerUtilsTest(unittest.TestCase):
# The biggest element should be first
self.assertEqual(len(data[indices[0]]["input_ids"]), 105)
# The indices should be a permutation of range(6)
- self.assertEqual(list(sorted(indices)), list(range(6)))
+ self.assertEqual(sorted(indices), list(range(6)))
def test_distributed_length_grouped(self):
# Get some inputs of random lengths
@@ -232,7 +232,7 @@ class TrainerUtilsTest(unittest.TestCase):
# The biggest element should be first
self.assertEqual(lengths[indices_process_0[0]], 50)
# The indices should be a permutation of range(100)
- self.assertEqual(list(sorted(indices_process_0 + indices_process_1)), list(range(100)))
+ self.assertEqual(sorted(indices_process_0 + indices_process_1), list(range(100)))
def test_get_parameter_names(self):
model = nn.Sequential(TstLayer(128), nn.ModuleList([TstLayer(128), TstLayer(128)]))
diff --git a/tests/utils/test_modeling_tf_core.py b/tests/utils/test_modeling_tf_core.py
index 7795833507..f144a7b8d9 100644
--- a/tests/utils/test_modeling_tf_core.py
+++ b/tests/utils/test_modeling_tf_core.py
@@ -285,7 +285,7 @@ class TFCoreModelTesterMixin:
del inputs_dict["decoder_head_mask"]
if "cross_attn_head_mask" in inputs_dict:
del inputs_dict["cross_attn_head_mask"]
- tf_main_layer_classes = set(
+ tf_main_layer_classes = {
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
@@ -295,7 +295,7 @@ class TFCoreModelTesterMixin:
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
- )
+ }
for main_layer_class in tf_main_layer_classes:
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
diff --git a/utils/check_copies.py b/utils/check_copies.py
index d32df3b870..4fd2017e60 100644
--- a/utils/check_copies.py
+++ b/utils/check_copies.py
@@ -385,7 +385,7 @@ def convert_to_localized_md(model_list, localized_model_list, format_str):
sorted_index = sorted(localized_model_index.items(), key=lambda x: x[0].lower())
- return readmes_match, "\n".join(map(lambda x: x[1], sorted_index)) + "\n"
+ return readmes_match, "\n".join((x[1] for x in sorted_index)) + "\n"
def convert_readme_to_index(model_list):
diff --git a/utils/check_doc_toc.py b/utils/check_doc_toc.py
index 67ec2f9466..a01804284c 100644
--- a/utils/check_doc_toc.py
+++ b/utils/check_doc_toc.py
@@ -33,7 +33,7 @@ def clean_model_doc_toc(model_doc):
new_doc = []
for duplicate_key in duplicates:
- titles = list(set(doc["title"] for doc in model_doc if doc["local"] == duplicate_key))
+ titles = list({doc["title"] for doc in model_doc if doc["local"] == duplicate_key})
if len(titles) > 1:
raise ValueError(
f"{duplicate_key} is present several times in the documentation table of content at "
diff --git a/utils/check_repo.py b/utils/check_repo.py
index 53717645cf..f7582f35ca 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -335,7 +335,7 @@ def check_model_list():
# Get the models from the directory structure of `src/transformers/models/`
models = [model for model in dir(transformers.models) if not model.startswith("__")]
- missing_models = sorted(list(set(_models).difference(models)))
+ missing_models = sorted(set(_models).difference(models))
if missing_models:
raise Exception(
f"The following models should be included in {models_dir}/__init__.py: {','.join(missing_models)}."
@@ -547,7 +547,7 @@ def get_all_auto_configured_models():
for attr_name in dir(transformers.models.auto.modeling_flax_auto):
if attr_name.startswith("FLAX_MODEL_") and attr_name.endswith("MAPPING_NAMES"):
result = result | set(get_values(getattr(transformers.models.auto.modeling_flax_auto, attr_name)))
- return [cls for cls in result]
+ return list(result)
def ignore_unautoclassed(model_name):
diff --git a/utils/create_dummy_models.py b/utils/create_dummy_models.py
index 47c150d6e8..162a310c65 100644
--- a/utils/create_dummy_models.py
+++ b/utils/create_dummy_models.py
@@ -413,10 +413,10 @@ def convert_processors(processors, tiny_config, output_folder, result):
feature_extractors.append(processor.feature_extractor)
# check the built processors have the unique type
- num_types = len(set([x.__class__.__name__ for x in feature_extractors]))
+ num_types = len({x.__class__.__name__ for x in feature_extractors})
if num_types >= 2:
raise ValueError(f"`feature_extractors` should contain at most 1 type, but it contains {num_types} types!")
- num_types = len(set([x.__class__.__name__.replace("Fast", "") for x in tokenizers]))
+ num_types = len({x.__class__.__name__.replace("Fast", "") for x in tokenizers})
if num_types >= 2:
raise ValueError(f"`tokenizers` should contain at most 1 tokenizer type, but it contains {num_types} types!")
@@ -712,7 +712,7 @@ def build_composite_models(config_class, output_dir):
shutil.copytree(decoder_processor_path, model_path, dirs_exist_ok=True)
# fill `result`
- result["processor"] = tuple(set([x.__name__ for x in encoder_processor + decoder_processor]))
+ result["processor"] = tuple({x.__name__ for x in encoder_processor + decoder_processor})
result["pytorch"] = {model_class.__name__: {"model": model_class.__name__, "checkpoint": model_path}}
diff --git a/utils/extract_warnings.py b/utils/extract_warnings.py
index cb609e8615..bc26e79366 100644
--- a/utils/extract_warnings.py
+++ b/utils/extract_warnings.py
@@ -134,6 +134,6 @@ if __name__ == "__main__":
# extract warnings from artifacts
selected_warnings = extract_warnings(args.output_dir, args.targets)
- selected_warnings = sorted(list(selected_warnings))
+ selected_warnings = sorted(selected_warnings)
with open(os.path.join(args.output_dir, "selected_warnings.json"), "w", encoding="UTF-8") as fp:
json.dump(selected_warnings, fp, ensure_ascii=False, indent=4)
diff --git a/utils/get_ci_error_statistics.py b/utils/get_ci_error_statistics.py
index b6642dce9c..5e2846ee39 100644
--- a/utils/get_ci_error_statistics.py
+++ b/utils/get_ci_error_statistics.py
@@ -166,7 +166,7 @@ def reduce_by_model(logs, error_filter=None):
logs = [(x[0], x[1], get_model(x[2])) for x in logs]
logs = [x for x in logs if x[2] is not None]
- tests = set([x[2] for x in logs])
+ tests = {x[2] for x in logs}
r = {}
for test in tests:
diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py
index 84dd062a19..1d1df9e817 100644
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -78,13 +78,11 @@ def get_all_tests():
# test folders/files directly under `tests` folder
tests = os.listdir(test_root_dir)
- tests = sorted(
- list(filter(lambda x: os.path.isdir(x) or x.startswith("tests/test_"), [f"tests/{x}" for x in tests]))
- )
+ tests = sorted(filter(lambda x: os.path.isdir(x) or x.startswith("tests/test_"), [f"tests/{x}" for x in tests]))
# model specific test folders
model_tests_folders = os.listdir(os.path.join(test_root_dir, "models"))
- model_test_folders = sorted(list(filter(os.path.isdir, [f"tests/models/{x}" for x in model_tests_folders])))
+ model_test_folders = sorted(filter(os.path.isdir, [f"tests/models/{x}" for x in model_tests_folders]))
tests.remove("tests/models")
tests = model_test_folders + tests
@@ -265,7 +263,7 @@ def get_tree_starting_at(module, edges):
tree = [module]
while len(new_edges) > 0:
tree.append(new_edges)
- final_vertices = list(set(edge[1] for edge in new_edges))
+ final_vertices = list({edge[1] for edge in new_edges})
vertices_seen.extend(final_vertices)
new_edges = [edge for edge in edges if edge[0] in final_vertices and edge[1] not in vertices_seen]
@@ -285,10 +283,10 @@ def print_tree_deps_of(module, all_edges=None):
lines = [(tree[0], tree[0])]
for index in range(1, len(tree)):
edges = tree[index]
- start_edges = set([edge[0] for edge in edges])
+ start_edges = {edge[0] for edge in edges}
for start in start_edges:
- end_edges = set([edge[1] for edge in edges if edge[0] == start])
+ end_edges = {edge[1] for edge in edges if edge[0] == start}
# We will insert all those edges just after the line showing start.
pos = 0
while lines[pos][1] != start:
@@ -547,7 +545,7 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j
impacted_files.extend(impacted_modules_map[f])
# Remove duplicates
- impacted_files = sorted(list(set(impacted_files)))
+ impacted_files = sorted(set(impacted_files))
print(f"\n### IMPACTED FILES ###\n{_print_list(impacted_files)}")
# Grab the corresponding test files:
@@ -578,7 +576,7 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j
test_files_to_run.extend(new_tests)
# Remove duplicates
- test_files_to_run = sorted(list(set(test_files_to_run)))
+ test_files_to_run = sorted(set(test_files_to_run))
# Make sure we did not end up with a test file that was removed
test_files_to_run = [f for f in test_files_to_run if os.path.isfile(f) or os.path.isdir(f)]
if filters is not None:
diff --git a/utils/update_metadata.py b/utils/update_metadata.py
index 6aeb767375..f95a4575d1 100644
--- a/utils/update_metadata.py
+++ b/utils/update_metadata.py
@@ -223,7 +223,7 @@ def update_metadata(token, commit_sha):
table = update_pipeline_and_auto_class_table(table)
# Sort the model classes to avoid some nondeterministic updates to create false update commits.
- model_classes = sorted(list(table.keys()))
+ model_classes = sorted(table.keys())
tags_table = pd.DataFrame(
{
"model_class": model_classes,