Update no trainer examples for QA and Semantic Segmentation (#18474)
* swag_no_trainer updated for with gather_metrics * Removed unused variable samples_seen * updated examples with gather_for_metrics
This commit is contained in:
committed by
GitHub
parent
d2704c4143
commit
0bf1e1aca4
@@ -698,7 +698,7 @@ def main():
|
|||||||
step = 0
|
step = 0
|
||||||
# create a numpy array and fill it with -100.
|
# create a numpy array and fill it with -100.
|
||||||
logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float32)
|
logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float32)
|
||||||
# Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather
|
# Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather_for_metrics
|
||||||
for i, output_logit in enumerate(start_or_end_logits): # populate columns
|
for i, output_logit in enumerate(start_or_end_logits): # populate columns
|
||||||
# We have to fill it such that we have to take the whole tensor and replace it on the newly created array
|
# We have to fill it such that we have to take the whole tensor and replace it on the newly created array
|
||||||
# And after every iteration we have to change the step
|
# And after every iteration we have to change the step
|
||||||
@@ -876,11 +876,11 @@ def main():
|
|||||||
end_top_index = accelerator.pad_across_processes(end_top_index, dim=1, pad_index=-100)
|
end_top_index = accelerator.pad_across_processes(end_top_index, dim=1, pad_index=-100)
|
||||||
cls_logits = accelerator.pad_across_processes(cls_logits, dim=1, pad_index=-100)
|
cls_logits = accelerator.pad_across_processes(cls_logits, dim=1, pad_index=-100)
|
||||||
|
|
||||||
all_start_top_log_probs.append(accelerator.gather(start_top_log_probs).cpu().numpy())
|
all_start_top_log_probs.append(accelerator.gather_for_metrics(start_top_log_probs).cpu().numpy())
|
||||||
all_start_top_index.append(accelerator.gather(start_top_index).cpu().numpy())
|
all_start_top_index.append(accelerator.gather_for_metrics(start_top_index).cpu().numpy())
|
||||||
all_end_top_log_probs.append(accelerator.gather(end_top_log_probs).cpu().numpy())
|
all_end_top_log_probs.append(accelerator.gather_for_metrics(end_top_log_probs).cpu().numpy())
|
||||||
all_end_top_index.append(accelerator.gather(end_top_index).cpu().numpy())
|
all_end_top_index.append(accelerator.gather_for_metrics(end_top_index).cpu().numpy())
|
||||||
all_cls_logits.append(accelerator.gather(cls_logits).cpu().numpy())
|
all_cls_logits.append(accelerator.gather_for_metrics(cls_logits).cpu().numpy())
|
||||||
|
|
||||||
max_len = max([x.shape[1] for x in all_end_top_log_probs]) # Get the max_length of the tensor
|
max_len = max([x.shape[1] for x in all_end_top_log_probs]) # Get the max_length of the tensor
|
||||||
|
|
||||||
@@ -936,11 +936,11 @@ def main():
|
|||||||
end_top_index = accelerator.pad_across_processes(end_top_index, dim=1, pad_index=-100)
|
end_top_index = accelerator.pad_across_processes(end_top_index, dim=1, pad_index=-100)
|
||||||
cls_logits = accelerator.pad_across_processes(cls_logits, dim=1, pad_index=-100)
|
cls_logits = accelerator.pad_across_processes(cls_logits, dim=1, pad_index=-100)
|
||||||
|
|
||||||
all_start_top_log_probs.append(accelerator.gather(start_top_log_probs).cpu().numpy())
|
all_start_top_log_probs.append(accelerator.gather_for_metrics(start_top_log_probs).cpu().numpy())
|
||||||
all_start_top_index.append(accelerator.gather(start_top_index).cpu().numpy())
|
all_start_top_index.append(accelerator.gather_for_metrics(start_top_index).cpu().numpy())
|
||||||
all_end_top_log_probs.append(accelerator.gather(end_top_log_probs).cpu().numpy())
|
all_end_top_log_probs.append(accelerator.gather_for_metrics(end_top_log_probs).cpu().numpy())
|
||||||
all_end_top_index.append(accelerator.gather(end_top_index).cpu().numpy())
|
all_end_top_index.append(accelerator.gather_for_metrics(end_top_index).cpu().numpy())
|
||||||
all_cls_logits.append(accelerator.gather(cls_logits).cpu().numpy())
|
all_cls_logits.append(accelerator.gather_for_metrics(cls_logits).cpu().numpy())
|
||||||
|
|
||||||
max_len = max([x.shape[1] for x in all_end_top_log_probs]) # Get the max_length of the tensor
|
max_len = max([x.shape[1] for x in all_end_top_log_probs]) # Get the max_length of the tensor
|
||||||
|
|
||||||
|
|||||||
@@ -715,7 +715,7 @@ def main():
|
|||||||
step = 0
|
step = 0
|
||||||
# create a numpy array and fill it with -100.
|
# create a numpy array and fill it with -100.
|
||||||
logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float64)
|
logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float64)
|
||||||
# Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather
|
# Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather_for_metrics
|
||||||
for i, output_logit in enumerate(start_or_end_logits): # populate columns
|
for i, output_logit in enumerate(start_or_end_logits): # populate columns
|
||||||
# We have to fill it such that we have to take the whole tensor and replace it on the newly created array
|
# We have to fill it such that we have to take the whole tensor and replace it on the newly created array
|
||||||
# And after every iteration we have to change the step
|
# And after every iteration we have to change the step
|
||||||
@@ -901,8 +901,8 @@ def main():
|
|||||||
start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)
|
start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)
|
||||||
end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100)
|
end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100)
|
||||||
|
|
||||||
all_start_logits.append(accelerator.gather(start_logits).cpu().numpy())
|
all_start_logits.append(accelerator.gather_for_metrics(start_logits).cpu().numpy())
|
||||||
all_end_logits.append(accelerator.gather(end_logits).cpu().numpy())
|
all_end_logits.append(accelerator.gather_for_metrics(end_logits).cpu().numpy())
|
||||||
|
|
||||||
max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor
|
max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor
|
||||||
|
|
||||||
@@ -940,8 +940,8 @@ def main():
|
|||||||
start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)
|
start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)
|
||||||
end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100)
|
end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100)
|
||||||
|
|
||||||
all_start_logits.append(accelerator.gather(start_logits).cpu().numpy())
|
all_start_logits.append(accelerator.gather_for_metrics(start_logits).cpu().numpy())
|
||||||
all_end_logits.append(accelerator.gather(end_logits).cpu().numpy())
|
all_end_logits.append(accelerator.gather_for_metrics(end_logits).cpu().numpy())
|
||||||
|
|
||||||
max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor
|
max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor
|
||||||
# concatenate the numpy array
|
# concatenate the numpy array
|
||||||
|
|||||||
@@ -605,7 +605,6 @@ def main():
|
|||||||
|
|
||||||
logger.info("***** Running evaluation *****")
|
logger.info("***** Running evaluation *****")
|
||||||
model.eval()
|
model.eval()
|
||||||
samples_seen = 0
|
|
||||||
for step, batch in enumerate(tqdm(eval_dataloader, disable=not accelerator.is_local_main_process)):
|
for step, batch in enumerate(tqdm(eval_dataloader, disable=not accelerator.is_local_main_process)):
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
outputs = model(**batch)
|
outputs = model(**batch)
|
||||||
@@ -615,15 +614,7 @@ def main():
|
|||||||
)
|
)
|
||||||
predictions = upsampled_logits.argmax(dim=1)
|
predictions = upsampled_logits.argmax(dim=1)
|
||||||
|
|
||||||
predictions, references = accelerator.gather((predictions, batch["labels"]))
|
predictions, references = accelerator.gather_for_metrics((predictions, batch["labels"]))
|
||||||
|
|
||||||
# If we are in a multiprocess environment, the last batch has duplicates
|
|
||||||
if accelerator.num_processes > 1:
|
|
||||||
if step == len(eval_dataloader) - 1:
|
|
||||||
predictions = predictions[: len(eval_dataloader.dataset) - samples_seen]
|
|
||||||
references = references[: len(eval_dataloader.dataset) - samples_seen]
|
|
||||||
else:
|
|
||||||
samples_seen += references.shape[0]
|
|
||||||
|
|
||||||
metric.add_batch(
|
metric.add_batch(
|
||||||
predictions=predictions,
|
predictions=predictions,
|
||||||
|
|||||||
Reference in New Issue
Block a user