Fix quality due to ruff release
This commit is contained in:
@@ -319,15 +319,13 @@ class FlaxDataCollatorForBartDenoisingLM:
|
||||
sentence_ends = np.argwhere(end_sentence_mask)
|
||||
sentence_ends[:, 1] += 1
|
||||
example_has_multiple_sentences, num_sentences = np.unique(sentence_ends[:, 0], return_counts=True)
|
||||
num_sentences_map = {sent_idx: count for sent_idx, count in zip(example_has_multiple_sentences, num_sentences)}
|
||||
num_sentences_map = dict(zip(example_has_multiple_sentences, num_sentences))
|
||||
|
||||
num_to_permute = np.ceil(num_sentences * self.permute_sentence_ratio).astype(int)
|
||||
num_to_permute_map = {
|
||||
sent_idx: count for sent_idx, count in zip(example_has_multiple_sentences, num_to_permute)
|
||||
}
|
||||
num_to_permute_map = dict(zip(example_has_multiple_sentences, num_to_permute))
|
||||
|
||||
sentence_ends = np.split(sentence_ends[:, 1], np.unique(sentence_ends[:, 0], return_index=True)[1][1:])
|
||||
sentence_ends_map = {sent_idx: count for sent_idx, count in zip(example_has_multiple_sentences, sentence_ends)}
|
||||
sentence_ends_map = dict(zip(example_has_multiple_sentences, sentence_ends))
|
||||
|
||||
for i in range(input_ids.shape[0]):
|
||||
if i not in example_has_multiple_sentences:
|
||||
|
||||
@@ -124,7 +124,7 @@ class GLUETransformer(BaseTransformer):
|
||||
|
||||
results = {**{"val_loss": val_loss_mean}, **compute_metrics(self.hparams.task, preds, out_label_ids)}
|
||||
|
||||
ret = {k: v for k, v in results.items()}
|
||||
ret = dict(results.items())
|
||||
ret["log"] = results
|
||||
return ret, preds_list, out_label_list
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ class NERTransformer(BaseTransformer):
|
||||
preds = np.argmax(preds, axis=2)
|
||||
out_label_ids = np.concatenate([x["target"] for x in outputs], axis=0)
|
||||
|
||||
label_map = {i: label for i, label in enumerate(self.labels)}
|
||||
label_map = dict(enumerate(self.labels))
|
||||
out_label_list = [[] for _ in range(out_label_ids.shape[0])]
|
||||
preds_list = [[] for _ in range(out_label_ids.shape[0])]
|
||||
|
||||
@@ -140,7 +140,7 @@ class NERTransformer(BaseTransformer):
|
||||
"f1": f1_score(out_label_list, preds_list),
|
||||
}
|
||||
|
||||
ret = {k: v for k, v in results.items()}
|
||||
ret = dict(results.items())
|
||||
ret["log"] = results
|
||||
return ret, preds_list, out_label_list
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ task_score_names = {
|
||||
|
||||
def parse_search_arg(search):
|
||||
groups = search.split()
|
||||
entries = {k: vs for k, vs in (g.split("=") for g in groups)}
|
||||
entries = dict((g.split("=") for g in groups))
|
||||
entry_names = list(entries.keys())
|
||||
sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()]
|
||||
matrix = [list(x) for x in itertools.product(*sets)]
|
||||
@@ -105,7 +105,7 @@ def run_search():
|
||||
col_widths = {col: len(str(col)) for col in col_names}
|
||||
results = []
|
||||
for r in matrix:
|
||||
hparams = {k: v for k, v in (x.replace("--", "").split() for x in r)}
|
||||
hparams = dict((x.replace("--", "").split() for x in r))
|
||||
args_exp = " ".join(r).split()
|
||||
args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM
|
||||
sys.argv = args_normal + args_exp
|
||||
|
||||
@@ -158,7 +158,7 @@ def main():
|
||||
|
||||
# Prepare CONLL-2003 task
|
||||
labels = token_classification_task.get_labels(data_args.labels)
|
||||
label_map: Dict[int, str] = {i: label for i, label in enumerate(labels)}
|
||||
label_map: Dict[int, str] = dict(enumerate(labels))
|
||||
num_labels = len(labels)
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
|
||||
@@ -144,7 +144,7 @@ def main():
|
||||
|
||||
# Prepare Token Classification task
|
||||
labels = token_classification_task.get_labels(data_args.labels)
|
||||
label_map: Dict[int, str] = {i: label for i, label in enumerate(labels)}
|
||||
label_map: Dict[int, str] = dict(enumerate(labels))
|
||||
num_labels = len(labels)
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
|
||||
@@ -407,7 +407,7 @@ def main():
|
||||
|
||||
# Set the correspondences label/ID inside the model config
|
||||
model.config.label2id = {l: i for i, l in enumerate(label_list)}
|
||||
model.config.id2label = {i: l for i, l in enumerate(label_list)}
|
||||
model.config.id2label = dict(enumerate(label_list))
|
||||
|
||||
# Map that sends B-Xxx label to its I-Xxx counterpart
|
||||
b_to_i_label = []
|
||||
|
||||
@@ -442,7 +442,7 @@ def main():
|
||||
|
||||
# Set the correspondences label/ID inside the model config
|
||||
model.config.label2id = {l: i for i, l in enumerate(label_list)}
|
||||
model.config.id2label = {i: l for i, l in enumerate(label_list)}
|
||||
model.config.id2label = dict(enumerate(label_list))
|
||||
|
||||
# Map that sends B-Xxx label to its I-Xxx counterpart
|
||||
b_to_i_label = []
|
||||
|
||||
@@ -294,11 +294,11 @@ def main():
|
||||
if isinstance(features[label_column_name].feature, ClassLabel):
|
||||
label_list = features[label_column_name].feature.names
|
||||
# No need to convert the labels since they are already ints.
|
||||
id2label = {k: v for k, v in enumerate(label_list)}
|
||||
id2label = dict(enumerate(label_list))
|
||||
label2id = {v: k for k, v in enumerate(label_list)}
|
||||
else:
|
||||
label_list = get_label_list(datasets["train"][label_column_name])
|
||||
id2label = {k: v for k, v in enumerate(label_list)}
|
||||
id2label = dict(enumerate(label_list))
|
||||
label2id = {v: k for k, v in enumerate(label_list)}
|
||||
num_labels = len(label_list)
|
||||
|
||||
|
||||
@@ -360,7 +360,7 @@ class GenerativeQAModule(BaseTransformer):
|
||||
|
||||
loss_tensors = self._step(batch)
|
||||
|
||||
logs = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
logs = dict(zip(self.loss_names, loss_tensors))
|
||||
# tokens per batch
|
||||
tgt_pad_token_id = (
|
||||
self.tokenizer.generator.pad_token_id
|
||||
@@ -434,7 +434,7 @@ class GenerativeQAModule(BaseTransformer):
|
||||
target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
|
||||
# print(preds,target)
|
||||
loss_tensors = self._step(batch)
|
||||
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
base_metrics = dict(zip(self.loss_names, loss_tensors))
|
||||
gen_metrics: Dict = self.calc_generative_metrics(preds, target)
|
||||
|
||||
summ_len = np.mean(lmap(len, generated_ids))
|
||||
|
||||
@@ -321,7 +321,7 @@ class GenerativeQAModule(BaseTransformer):
|
||||
preds: List[str] = self.ids_to_clean_text(generated_ids)
|
||||
target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
|
||||
loss_tensors = self._step(batch)
|
||||
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
base_metrics = dict(zip(self.loss_names, loss_tensors))
|
||||
gen_metrics: Dict = self.calc_generative_metrics(preds, target)
|
||||
|
||||
summ_len = np.mean(lmap(len, generated_ids))
|
||||
|
||||
@@ -170,7 +170,7 @@ class SummarizationModule(BaseTransformer):
|
||||
def training_step(self, batch, batch_idx) -> Dict:
|
||||
loss_tensors = self._step(batch)
|
||||
|
||||
logs = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
logs = dict(zip(self.loss_names, loss_tensors))
|
||||
# tokens per batch
|
||||
logs["tpb"] = batch["input_ids"].ne(self.pad).sum() + batch["labels"].ne(self.pad).sum()
|
||||
logs["bs"] = batch["input_ids"].shape[0]
|
||||
@@ -225,7 +225,7 @@ class SummarizationModule(BaseTransformer):
|
||||
preds: List[str] = self.ids_to_clean_text(generated_ids)
|
||||
target: List[str] = self.ids_to_clean_text(batch["labels"])
|
||||
loss_tensors = self._step(batch)
|
||||
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
base_metrics = dict(zip(self.loss_names, loss_tensors))
|
||||
rouge: Dict = self.calc_generative_metrics(preds, target)
|
||||
summ_len = np.mean(lmap(len, generated_ids))
|
||||
base_metrics.update(gen_time=gen_time, gen_len=summ_len, preds=preds, target=target, **rouge)
|
||||
|
||||
@@ -303,7 +303,7 @@ def main():
|
||||
student_args.student_name_or_path, num_labels=len(class_names)
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(student_args.student_name_or_path, use_fast=data_args.use_fast_tokenizer)
|
||||
model.config.id2label = {i: label for i, label in enumerate(class_names)}
|
||||
model.config.id2label = dict(enumerate(class_names))
|
||||
model.config.label2id = {label: i for i, label in enumerate(class_names)}
|
||||
|
||||
# 4. train student on teacher predictions
|
||||
|
||||
Reference in New Issue
Block a user