From 964a1b6b7de8a83414917b5344f85d79bb0be808 Mon Sep 17 00:00:00 2001 From: jeffhataws Date: Tue, 22 Apr 2025 03:13:25 -0700 Subject: [PATCH] Fix ValueError when eval_do_concat_batches=False with examples (#37621) https://github.com/huggingface/transformers/issues/37593 Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> --- examples/pytorch/text-classification/run_glue.py | 6 +++++- examples/pytorch/token-classification/run_ner.py | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index c293038085..e152cd9911 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -508,8 +508,12 @@ def main(): # predictions and label_ids field) and has to return a dictionary string to float. def compute_metrics(p: EvalPrediction): preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions + labels = p.label_ids + if not training_args.eval_do_concat_batches: + preds = np.concatenate(preds, axis=0) + labels = np.concatenate(p.label_ids, axis=0) preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1) - result = metric.compute(predictions=preds, references=p.label_ids) + result = metric.compute(predictions=preds, references=labels) if len(result) > 1: result["combined_score"] = np.mean(list(result.values())).item() return result diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index bbe85cff2e..28c344de27 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -529,6 +529,9 @@ def main(): def compute_metrics(p): predictions, labels = p + if not training_args.eval_do_concat_batches: + predictions = np.hstack(predictions) + labels = np.hstack(labels) predictions = np.argmax(predictions, axis=2) # Remove ignored index (special tokens)