Apply ruff flake8-comprehensions (#21694)
This commit is contained in:
@@ -293,7 +293,7 @@ def main():
|
||||
audio["array"], max_length=data_args.max_length_seconds, sample_rate=feature_extractor.sampling_rate
|
||||
)
|
||||
output_batch["input_values"].append(wav)
|
||||
output_batch["labels"] = [label for label in batch[data_args.label_column_name]]
|
||||
output_batch["labels"] = list(batch[data_args.label_column_name])
|
||||
|
||||
return output_batch
|
||||
|
||||
@@ -303,14 +303,14 @@ def main():
|
||||
for audio in batch[data_args.audio_column_name]:
|
||||
wav = audio["array"]
|
||||
output_batch["input_values"].append(wav)
|
||||
output_batch["labels"] = [label for label in batch[data_args.label_column_name]]
|
||||
output_batch["labels"] = list(batch[data_args.label_column_name])
|
||||
|
||||
return output_batch
|
||||
|
||||
# Prepare label mappings.
|
||||
# We'll include these in the model's config to get human readable labels in the Inference API.
|
||||
labels = raw_datasets["train"].features[data_args.label_column_name].names
|
||||
label2id, id2label = dict(), dict()
|
||||
label2id, id2label = {}, {}
|
||||
for i, label in enumerate(labels):
|
||||
label2id[label] = str(i)
|
||||
id2label[str(i)] = label
|
||||
|
||||
@@ -83,7 +83,7 @@ def can_convert_to_float(string):
|
||||
class Plot:
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
self.result_dict = defaultdict(lambda: dict(bsz=[], seq_len=[], result={}))
|
||||
self.result_dict = defaultdict(lambda: {"bsz": [], "seq_len": [], "result": {}})
|
||||
|
||||
with open(self.args.csv_file, newline="") as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
@@ -116,8 +116,8 @@ class Plot:
|
||||
axis.set_major_formatter(ScalarFormatter())
|
||||
|
||||
for model_name_idx, model_name in enumerate(self.result_dict.keys()):
|
||||
batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"])))
|
||||
sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"])))
|
||||
batch_sizes = sorted(set(self.result_dict[model_name]["bsz"]))
|
||||
sequence_lengths = sorted(set(self.result_dict[model_name]["seq_len"]))
|
||||
results = self.result_dict[model_name]["result"]
|
||||
|
||||
(x_axis_array, inner_loop_array) = (
|
||||
|
||||
@@ -397,7 +397,7 @@ def main():
|
||||
# Preprocessing the datasets.
|
||||
# We need to tokenize input captions and transform the images.
|
||||
def tokenize_captions(examples):
|
||||
captions = [caption for caption in examples[caption_column]]
|
||||
captions = list(examples[caption_column])
|
||||
text_inputs = tokenizer(captions, max_length=data_args.max_seq_length, padding="max_length", truncation=True)
|
||||
examples["input_ids"] = text_inputs.input_ids
|
||||
examples["attention_mask"] = text_inputs.attention_mask
|
||||
|
||||
@@ -250,7 +250,7 @@ def main():
|
||||
# Prepare label mappings.
|
||||
# We'll include these in the model's config to get human readable labels in the Inference API.
|
||||
labels = dataset["train"].features["labels"].names
|
||||
label2id, id2label = dict(), dict()
|
||||
label2id, id2label = {}, {}
|
||||
for i, label in enumerate(labels):
|
||||
label2id[label] = str(i)
|
||||
id2label[str(i)] = label
|
||||
|
||||
@@ -91,7 +91,7 @@ class DataTrainingArguments:
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
data_files = dict()
|
||||
data_files = {}
|
||||
if self.train_dir is not None:
|
||||
data_files["train"] = self.train_dir
|
||||
if self.validation_dir is not None:
|
||||
|
||||
@@ -104,7 +104,7 @@ class DataTrainingArguments:
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
data_files = dict()
|
||||
data_files = {}
|
||||
if self.train_dir is not None:
|
||||
data_files["train"] = self.train_dir
|
||||
if self.validation_dir is not None:
|
||||
|
||||
@@ -407,7 +407,7 @@ def main():
|
||||
)
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_config(config)
|
||||
n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())
|
||||
n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
|
||||
logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
|
||||
|
||||
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
|
||||
|
||||
@@ -457,14 +457,14 @@ def main():
|
||||
trainer.log_metrics("eval", metrics)
|
||||
trainer.save_metrics("eval", metrics)
|
||||
|
||||
kwargs = dict(
|
||||
finetuned_from=model_args.model_name_or_path,
|
||||
tasks="multiple-choice",
|
||||
dataset_tags="swag",
|
||||
dataset_args="regular",
|
||||
dataset="SWAG",
|
||||
language="en",
|
||||
)
|
||||
kwargs = {
|
||||
"finetuned_from": model_args.model_name_or_path,
|
||||
"tasks": "multiple-choice",
|
||||
"dataset_tags": "swag",
|
||||
"dataset_args": "regular",
|
||||
"dataset": "SWAG",
|
||||
"language": "en",
|
||||
}
|
||||
|
||||
if training_args.push_to_hub:
|
||||
trainer.push_to_hub(**kwargs)
|
||||
|
||||
@@ -430,7 +430,7 @@ def main():
|
||||
pixel_values.append(image)
|
||||
labels.append(target)
|
||||
|
||||
encoding = dict()
|
||||
encoding = {}
|
||||
encoding["pixel_values"] = torch.stack(pixel_values)
|
||||
encoding["labels"] = torch.stack(labels)
|
||||
|
||||
@@ -444,7 +444,7 @@ def main():
|
||||
pixel_values.append(image)
|
||||
labels.append(target)
|
||||
|
||||
encoding = dict()
|
||||
encoding = {}
|
||||
encoding["pixel_values"] = torch.stack(pixel_values)
|
||||
encoding["labels"] = torch.stack(labels)
|
||||
|
||||
|
||||
@@ -441,7 +441,7 @@ def main():
|
||||
pixel_values.append(image)
|
||||
labels.append(target)
|
||||
|
||||
encoding = dict()
|
||||
encoding = {}
|
||||
encoding["pixel_values"] = torch.stack(pixel_values)
|
||||
encoding["labels"] = torch.stack(labels)
|
||||
|
||||
@@ -455,7 +455,7 @@ def main():
|
||||
pixel_values.append(image)
|
||||
labels.append(target)
|
||||
|
||||
encoding = dict()
|
||||
encoding = {}
|
||||
encoding["pixel_values"] = torch.stack(pixel_values)
|
||||
encoding["labels"] = torch.stack(labels)
|
||||
|
||||
|
||||
@@ -349,7 +349,7 @@ def create_vocabulary_from_data(
|
||||
lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values()
|
||||
)
|
||||
|
||||
vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
|
||||
vocab_dict = {v: k for k, v in enumerate(sorted(vocab_set))}
|
||||
|
||||
# replace white space with delimiter token
|
||||
if word_delimiter_token is not None:
|
||||
|
||||
@@ -406,12 +406,12 @@ def main():
|
||||
):
|
||||
# Some have all caps in their config, some don't.
|
||||
label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
|
||||
if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
|
||||
if sorted(label_name_to_id.keys()) == sorted(label_list):
|
||||
label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
|
||||
else:
|
||||
logger.warning(
|
||||
"Your model seems to have been trained with labels, but they don't match the dataset: ",
|
||||
f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
|
||||
f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
|
||||
"\nIgnoring the model labels as a result.",
|
||||
)
|
||||
elif data_args.task_name is None and not is_regression:
|
||||
|
||||
@@ -339,7 +339,7 @@ def main():
|
||||
):
|
||||
# Some have all caps in their config, some don't.
|
||||
label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
|
||||
if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
|
||||
if sorted(label_name_to_id.keys()) == sorted(label_list):
|
||||
logger.info(
|
||||
f"The configuration of the model provided the following label correspondence: {label_name_to_id}. "
|
||||
"Using it!"
|
||||
@@ -348,7 +348,7 @@ def main():
|
||||
else:
|
||||
logger.warning(
|
||||
"Your model seems to have been trained with labels, but they don't match the dataset: ",
|
||||
f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
|
||||
f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
|
||||
"\nIgnoring the model labels as a result.",
|
||||
)
|
||||
elif args.task_name is None and not is_regression:
|
||||
|
||||
@@ -386,7 +386,7 @@ def main():
|
||||
|
||||
# Model has labels -> use them.
|
||||
if model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
|
||||
if list(sorted(model.config.label2id.keys())) == list(sorted(label_list)):
|
||||
if sorted(model.config.label2id.keys()) == sorted(label_list):
|
||||
# Reorganize `label_list` to match the ordering of the model.
|
||||
if labels_are_int:
|
||||
label_to_id = {i: int(model.config.label2id[l]) for i, l in enumerate(label_list)}
|
||||
@@ -397,8 +397,8 @@ def main():
|
||||
else:
|
||||
logger.warning(
|
||||
"Your model seems to have been trained with labels, but they don't match the dataset: ",
|
||||
f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
|
||||
f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
|
||||
f"model labels: {sorted(model.config.label2id.keys())}, dataset labels:"
|
||||
f" {sorted(label_list)}.\nIgnoring the model labels as a result.",
|
||||
)
|
||||
|
||||
# Set the correspondences label/ID inside the model config
|
||||
|
||||
@@ -425,7 +425,7 @@ def main():
|
||||
|
||||
# Model has labels -> use them.
|
||||
if model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
|
||||
if list(sorted(model.config.label2id.keys())) == list(sorted(label_list)):
|
||||
if sorted(model.config.label2id.keys()) == sorted(label_list):
|
||||
# Reorganize `label_list` to match the ordering of the model.
|
||||
if labels_are_int:
|
||||
label_to_id = {i: int(model.config.label2id[l]) for i, l in enumerate(label_list)}
|
||||
@@ -436,8 +436,8 @@ def main():
|
||||
else:
|
||||
logger.warning(
|
||||
"Your model seems to have been trained with labels, but they don't match the dataset: ",
|
||||
f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
|
||||
f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
|
||||
f"model labels: {sorted(model.config.label2id.keys())}, dataset labels:"
|
||||
f" {sorted(label_list)}.\nIgnoring the model labels as a result.",
|
||||
)
|
||||
|
||||
# Set the correspondences label/ID inside the model config
|
||||
|
||||
Reference in New Issue
Block a user