Fix quality due to ruff release
This commit is contained in:
@@ -294,11 +294,11 @@ def main():
|
||||
if isinstance(features[label_column_name].feature, ClassLabel):
|
||||
label_list = features[label_column_name].feature.names
|
||||
# No need to convert the labels since they are already ints.
|
||||
id2label = {k: v for k, v in enumerate(label_list)}
|
||||
id2label = dict(enumerate(label_list))
|
||||
label2id = {v: k for k, v in enumerate(label_list)}
|
||||
else:
|
||||
label_list = get_label_list(datasets["train"][label_column_name])
|
||||
id2label = {k: v for k, v in enumerate(label_list)}
|
||||
id2label = dict(enumerate(label_list))
|
||||
label2id = {v: k for k, v in enumerate(label_list)}
|
||||
num_labels = len(label_list)
|
||||
|
||||
|
||||
@@ -360,7 +360,7 @@ class GenerativeQAModule(BaseTransformer):
|
||||
|
||||
loss_tensors = self._step(batch)
|
||||
|
||||
logs = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
logs = dict(zip(self.loss_names, loss_tensors))
|
||||
# tokens per batch
|
||||
tgt_pad_token_id = (
|
||||
self.tokenizer.generator.pad_token_id
|
||||
@@ -434,7 +434,7 @@ class GenerativeQAModule(BaseTransformer):
|
||||
target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
|
||||
# print(preds,target)
|
||||
loss_tensors = self._step(batch)
|
||||
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
base_metrics = dict(zip(self.loss_names, loss_tensors))
|
||||
gen_metrics: Dict = self.calc_generative_metrics(preds, target)
|
||||
|
||||
summ_len = np.mean(lmap(len, generated_ids))
|
||||
|
||||
@@ -321,7 +321,7 @@ class GenerativeQAModule(BaseTransformer):
|
||||
preds: List[str] = self.ids_to_clean_text(generated_ids)
|
||||
target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
|
||||
loss_tensors = self._step(batch)
|
||||
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
base_metrics = dict(zip(self.loss_names, loss_tensors))
|
||||
gen_metrics: Dict = self.calc_generative_metrics(preds, target)
|
||||
|
||||
summ_len = np.mean(lmap(len, generated_ids))
|
||||
|
||||
@@ -170,7 +170,7 @@ class SummarizationModule(BaseTransformer):
|
||||
def training_step(self, batch, batch_idx) -> Dict:
|
||||
loss_tensors = self._step(batch)
|
||||
|
||||
logs = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
logs = dict(zip(self.loss_names, loss_tensors))
|
||||
# tokens per batch
|
||||
logs["tpb"] = batch["input_ids"].ne(self.pad).sum() + batch["labels"].ne(self.pad).sum()
|
||||
logs["bs"] = batch["input_ids"].shape[0]
|
||||
@@ -225,7 +225,7 @@ class SummarizationModule(BaseTransformer):
|
||||
preds: List[str] = self.ids_to_clean_text(generated_ids)
|
||||
target: List[str] = self.ids_to_clean_text(batch["labels"])
|
||||
loss_tensors = self._step(batch)
|
||||
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
|
||||
base_metrics = dict(zip(self.loss_names, loss_tensors))
|
||||
rouge: Dict = self.calc_generative_metrics(preds, target)
|
||||
summ_len = np.mean(lmap(len, generated_ids))
|
||||
base_metrics.update(gen_time=gen_time, gen_len=summ_len, preds=preds, target=target, **rouge)
|
||||
|
||||
@@ -303,7 +303,7 @@ def main():
|
||||
student_args.student_name_or_path, num_labels=len(class_names)
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(student_args.student_name_or_path, use_fast=data_args.use_fast_tokenizer)
|
||||
model.config.id2label = {i: label for i, label in enumerate(class_names)}
|
||||
model.config.id2label = dict(enumerate(class_names))
|
||||
model.config.label2id = {label: i for i, label in enumerate(class_names)}
|
||||
|
||||
# 4. train student on teacher predictions
|
||||
|
||||
Reference in New Issue
Block a user