Apply ruff flake8-comprehensions (#21694)
This commit is contained in:
@@ -83,7 +83,7 @@ def can_convert_to_float(string):
|
||||
class Plot:
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
self.result_dict = defaultdict(lambda: dict(bsz=[], seq_len=[], result={}))
|
||||
self.result_dict = defaultdict(lambda: {"bsz": [], "seq_len": [], "result": {}})
|
||||
|
||||
with open(self.args.csv_file, newline="") as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
@@ -116,8 +116,8 @@ class Plot:
|
||||
axis.set_major_formatter(ScalarFormatter())
|
||||
|
||||
for model_name_idx, model_name in enumerate(self.result_dict.keys()):
|
||||
batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"])))
|
||||
sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"])))
|
||||
batch_sizes = sorted(set(self.result_dict[model_name]["bsz"]))
|
||||
sequence_lengths = sorted(set(self.result_dict[model_name]["seq_len"]))
|
||||
results = self.result_dict[model_name]["result"]
|
||||
|
||||
(x_axis_array, inner_loop_array) = (
|
||||
|
||||
@@ -300,7 +300,7 @@ def main():
|
||||
# Prepare label mappings.
|
||||
# We'll include these in the model's config to get human readable labels in the Inference API.
|
||||
labels = dataset["train"].features["labels"].names
|
||||
label2id, id2label = dict(), dict()
|
||||
label2id, id2label = {}, {}
|
||||
for i, label in enumerate(labels):
|
||||
label2id[label] = str(i)
|
||||
id2label[str(i)] = label
|
||||
|
||||
@@ -600,7 +600,7 @@ def main():
|
||||
|
||||
if training_args.output_dir is not None:
|
||||
output_eval_file = os.path.join(training_args.output_dir, "all_results.json")
|
||||
results_dict = dict()
|
||||
results_dict = {}
|
||||
results_dict["train_loss"] = train_loss
|
||||
results_dict["train_perplexity"] = train_perplexity
|
||||
results_dict["eval_loss"] = validation_loss
|
||||
|
||||
@@ -623,7 +623,7 @@ def main():
|
||||
|
||||
if training_args.output_dir is not None:
|
||||
output_eval_file = os.path.join(training_args.output_dir, "all_results.json")
|
||||
results_dict = dict()
|
||||
results_dict = {}
|
||||
results_dict["train_loss"] = train_loss
|
||||
results_dict["train_perplexity"] = train_perplexity
|
||||
results_dict["eval_loss"] = validation_loss
|
||||
|
||||
@@ -464,7 +464,7 @@ def main():
|
||||
|
||||
return tokenized_examples
|
||||
|
||||
processed_datasets = dict()
|
||||
processed_datasets = {}
|
||||
if training_args.do_train:
|
||||
if "train" not in datasets:
|
||||
raise ValueError("--do_train requires a train dataset")
|
||||
|
||||
@@ -310,12 +310,12 @@ def main():
|
||||
if config.label2id != PretrainedConfig(num_labels=num_labels).label2id and not is_regression:
|
||||
# Some have all caps in their config, some don't.
|
||||
label_name_to_id = {k.lower(): v for k, v in config.label2id.items()}
|
||||
if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
|
||||
if sorted(label_name_to_id.keys()) == sorted(label_list):
|
||||
label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
|
||||
else:
|
||||
logger.warning(
|
||||
"Your model seems to have been trained with labels, but they don't match the dataset: ",
|
||||
f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
|
||||
f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
|
||||
"\nIgnoring the model labels as a result.",
|
||||
)
|
||||
label_to_id = {label: i for i, label in enumerate(label_list)}
|
||||
@@ -383,7 +383,7 @@ def main():
|
||||
dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
|
||||
num_replicas = training_args.strategy.num_replicas_in_sync
|
||||
|
||||
tf_data = dict()
|
||||
tf_data = {}
|
||||
max_samples = {
|
||||
"train": data_args.max_train_samples,
|
||||
"validation": data_args.max_eval_samples,
|
||||
|
||||
@@ -343,13 +343,13 @@ def main():
|
||||
if "train" in datasets:
|
||||
if not is_regression and config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
|
||||
label_name_to_id = config.label2id
|
||||
if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
|
||||
if sorted(label_name_to_id.keys()) == sorted(label_list):
|
||||
label_to_id = label_name_to_id # Use the model's labels
|
||||
else:
|
||||
logger.warning(
|
||||
"Your model seems to have been trained with labels, but they don't match the dataset: ",
|
||||
f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels:"
|
||||
f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
|
||||
f"model labels: {sorted(label_name_to_id.keys())}, dataset labels:"
|
||||
f" {sorted(label_list)}.\nIgnoring the model labels as a result.",
|
||||
)
|
||||
label_to_id = {v: i for i, v in enumerate(label_list)}
|
||||
elif not is_regression:
|
||||
@@ -411,7 +411,7 @@ def main():
|
||||
dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
|
||||
num_replicas = training_args.strategy.num_replicas_in_sync
|
||||
|
||||
tf_data = dict()
|
||||
tf_data = {}
|
||||
max_samples = {
|
||||
"train": data_args.max_train_samples,
|
||||
"validation": data_args.max_val_samples,
|
||||
|
||||
Reference in New Issue
Block a user