Apply ruff flake8-comprehensions (#21694)

This commit is contained in:
Aaron Gokaslan
2023-02-22 03:14:54 -05:00
committed by GitHub
parent df06fb1f0b
commit 5e8c8eb5ba
230 changed files with 971 additions and 955 deletions

View File

@@ -83,7 +83,7 @@ def can_convert_to_float(string):
class Plot:
def __init__(self, args):
self.args = args
self.result_dict = defaultdict(lambda: dict(bsz=[], seq_len=[], result={}))
self.result_dict = defaultdict(lambda: {"bsz": [], "seq_len": [], "result": {}})
with open(self.args.csv_file, newline="") as csv_file:
reader = csv.DictReader(csv_file)
@@ -116,8 +116,8 @@ class Plot:
axis.set_major_formatter(ScalarFormatter())
for model_name_idx, model_name in enumerate(self.result_dict.keys()):
batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"])))
sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"])))
batch_sizes = sorted(set(self.result_dict[model_name]["bsz"]))
sequence_lengths = sorted(set(self.result_dict[model_name]["seq_len"]))
results = self.result_dict[model_name]["result"]
(x_axis_array, inner_loop_array) = (

View File

@@ -300,7 +300,7 @@ def main():
# Prepare label mappings.
# We'll include these in the model's config to get human readable labels in the Inference API.
labels = dataset["train"].features["labels"].names
label2id, id2label = dict(), dict()
label2id, id2label = {}, {}
for i, label in enumerate(labels):
label2id[label] = str(i)
id2label[str(i)] = label

View File

@@ -600,7 +600,7 @@ def main():
if training_args.output_dir is not None:
output_eval_file = os.path.join(training_args.output_dir, "all_results.json")
results_dict = dict()
results_dict = {}
results_dict["train_loss"] = train_loss
results_dict["train_perplexity"] = train_perplexity
results_dict["eval_loss"] = validation_loss

View File

@@ -623,7 +623,7 @@ def main():
if training_args.output_dir is not None:
output_eval_file = os.path.join(training_args.output_dir, "all_results.json")
results_dict = dict()
results_dict = {}
results_dict["train_loss"] = train_loss
results_dict["train_perplexity"] = train_perplexity
results_dict["eval_loss"] = validation_loss

View File

@@ -464,7 +464,7 @@ def main():
return tokenized_examples
processed_datasets = dict()
processed_datasets = {}
if training_args.do_train:
if "train" not in datasets:
raise ValueError("--do_train requires a train dataset")

View File

@@ -310,12 +310,12 @@ def main():
if config.label2id != PretrainedConfig(num_labels=num_labels).label2id and not is_regression:
# Some have all caps in their config, some don't.
label_name_to_id = {k.lower(): v for k, v in config.label2id.items()}
if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
if sorted(label_name_to_id.keys()) == sorted(label_list):
label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
"\nIgnoring the model labels as a result.",
)
label_to_id = {label: i for i, label in enumerate(label_list)}
@@ -383,7 +383,7 @@ def main():
dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
num_replicas = training_args.strategy.num_replicas_in_sync
tf_data = dict()
tf_data = {}
max_samples = {
"train": data_args.max_train_samples,
"validation": data_args.max_eval_samples,

View File

@@ -343,13 +343,13 @@ def main():
if "train" in datasets:
if not is_regression and config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
label_name_to_id = config.label2id
if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
if sorted(label_name_to_id.keys()) == sorted(label_list):
label_to_id = label_name_to_id # Use the model's labels
else:
logger.warning(
"Your model seems to have been trained with labels, but they don't match the dataset: ",
f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels:"
f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
f"model labels: {sorted(label_name_to_id.keys())}, dataset labels:"
f" {sorted(label_list)}.\nIgnoring the model labels as a result.",
)
label_to_id = {v: i for i, v in enumerate(label_list)}
elif not is_regression:
@@ -411,7 +411,7 @@ def main():
dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
num_replicas = training_args.strategy.num_replicas_in_sync
tf_data = dict()
tf_data = {}
max_samples = {
"train": data_args.max_train_samples,
"validation": data_args.max_val_samples,