Fix quality due to ruff release

This commit is contained in:
Sylvain
2023-03-22 20:45:08 -04:00
parent 73fdc8c5b4
commit ef28df0572
28 changed files with 40 additions and 58 deletions

View File

@@ -294,11 +294,11 @@ def main():
if isinstance(features[label_column_name].feature, ClassLabel):
label_list = features[label_column_name].feature.names
# No need to convert the labels since they are already ints.
id2label = {k: v for k, v in enumerate(label_list)}
id2label = dict(enumerate(label_list))
label2id = {v: k for k, v in enumerate(label_list)}
else:
label_list = get_label_list(datasets["train"][label_column_name])
id2label = {k: v for k, v in enumerate(label_list)}
id2label = dict(enumerate(label_list))
label2id = {v: k for k, v in enumerate(label_list)}
num_labels = len(label_list)

View File

@@ -360,7 +360,7 @@ class GenerativeQAModule(BaseTransformer):
loss_tensors = self._step(batch)
logs = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
logs = dict(zip(self.loss_names, loss_tensors))
# tokens per batch
tgt_pad_token_id = (
self.tokenizer.generator.pad_token_id
@@ -434,7 +434,7 @@ class GenerativeQAModule(BaseTransformer):
target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
# print(preds,target)
loss_tensors = self._step(batch)
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
base_metrics = dict(zip(self.loss_names, loss_tensors))
gen_metrics: Dict = self.calc_generative_metrics(preds, target)
summ_len = np.mean(lmap(len, generated_ids))

View File

@@ -321,7 +321,7 @@ class GenerativeQAModule(BaseTransformer):
preds: List[str] = self.ids_to_clean_text(generated_ids)
target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
loss_tensors = self._step(batch)
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
base_metrics = dict(zip(self.loss_names, loss_tensors))
gen_metrics: Dict = self.calc_generative_metrics(preds, target)
summ_len = np.mean(lmap(len, generated_ids))

View File

@@ -170,7 +170,7 @@ class SummarizationModule(BaseTransformer):
def training_step(self, batch, batch_idx) -> Dict:
loss_tensors = self._step(batch)
logs = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
logs = dict(zip(self.loss_names, loss_tensors))
# tokens per batch
logs["tpb"] = batch["input_ids"].ne(self.pad).sum() + batch["labels"].ne(self.pad).sum()
logs["bs"] = batch["input_ids"].shape[0]
@@ -225,7 +225,7 @@ class SummarizationModule(BaseTransformer):
preds: List[str] = self.ids_to_clean_text(generated_ids)
target: List[str] = self.ids_to_clean_text(batch["labels"])
loss_tensors = self._step(batch)
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
base_metrics = dict(zip(self.loss_names, loss_tensors))
rouge: Dict = self.calc_generative_metrics(preds, target)
summ_len = np.mean(lmap(len, generated_ids))
base_metrics.update(gen_time=gen_time, gen_len=summ_len, preds=preds, target=target, **rouge)

View File

@@ -303,7 +303,7 @@ def main():
student_args.student_name_or_path, num_labels=len(class_names)
)
tokenizer = AutoTokenizer.from_pretrained(student_args.student_name_or_path, use_fast=data_args.use_fast_tokenizer)
model.config.id2label = {i: label for i, label in enumerate(class_names)}
model.config.id2label = dict(enumerate(class_names))
model.config.label2id = {label: i for i, label in enumerate(class_names)}
# 4. train student on teacher predictions