Trainer - add cache clearing and the option for batched eval metrics computation (#28769)

* Added cache clearing for GPU efficiency.

* Added cache clearing for GPU efficiency.

* Added batch_eval_metrics capability

* Ran make fixup

* Fixed bug

* Fixed whitespace issue

* Fixed outdated condition

* Updated docstrings with instructions for batch_eval_metrics. Updated end of dataloader logic

* Added first version of batch_eval_metrics Trainer test

* Fixed batch_eval_metrics Trainer tests for both eval and predict

* Fixed batch_eval_metrics behavior for new Trainer variables

* Fixed batch_eval_metrics Trainer tests

* Ran fixup
This commit is contained in:
Nate Cibik
2024-05-06 05:23:40 -07:00
committed by GitHub
parent e076953079
commit df475bf8e6
3 changed files with 205 additions and 11 deletions

View File

@@ -230,6 +230,27 @@ class AlmostAccuracy:
return {"accuracy": true.astype(np.float32).mean().item()}
class AlmostAccuracyBatched:
def __init__(self, thresh=0.25):
self.thresh = thresh
self.batch_acc = []
def __call__(self, eval_pred, compute_result):
predictions, labels = eval_pred
if isinstance(predictions, tuple):
predictions = predictions[0]
if isinstance(labels, tuple):
labels = labels[0]
batch_size = len(predictions)
true = torch.abs(predictions - labels) <= self.thresh
acc = true.type(torch.FloatTensor).mean().item()
self.batch_acc.extend([acc] * batch_size)
if compute_result:
result = {"accuracy": np.mean(self.batch_acc).item()}
self.batch_acc = []
return result
class RegressionModelConfig(PretrainedConfig):
def __init__(self, a=0, b=0, double_output=False, random_torch=True, **kwargs):
super().__init__(**kwargs)
@@ -1524,6 +1545,49 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
expected_acc = AlmostAccuracy()((pred + 1, y))["accuracy"]
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
def test_evaluate_with_batch_eval_metrics(self):
trainer = get_regression_trainer(
a=1.5, b=2.5, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
)
results = trainer.evaluate()
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
pred = 1.5 * x + 2.5
expected_loss = ((pred - y) ** 2).mean()
self.assertAlmostEqual(results["eval_loss"], expected_loss)
expected_acc = AlmostAccuracy()((pred, y))["accuracy"]
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
# With a number of elements not a round multiple of the batch size
trainer = get_regression_trainer(
a=1.5, b=2.5, eval_len=66, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
)
results = trainer.evaluate()
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
pred = 1.5 * x + 2.5
expected_loss = ((pred - y) ** 2).mean()
self.assertAlmostEqual(results["eval_loss"], expected_loss)
expected_acc = AlmostAccuracy()((pred, y))["accuracy"]
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
# With logits preprocess
trainer = get_regression_trainer(
a=1.5,
b=2.5,
compute_metrics=AlmostAccuracyBatched(),
batch_eval_metrics=True,
preprocess_logits_for_metrics=lambda logits, labels: logits + 1,
)
results = trainer.evaluate()
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
pred = 1.5 * x + 2.5
expected_loss = ((pred - y) ** 2).mean()
self.assertAlmostEqual(results["eval_loss"], expected_loss)
expected_acc = AlmostAccuracy()((pred + 1, y))["accuracy"]
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
def test_evaluate_with_jit(self):
trainer = get_regression_trainer(a=1.5, b=2.5, compute_metrics=AlmostAccuracy(), jit_mode_eval=True)
results = trainer.evaluate()
@@ -1651,6 +1715,58 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
def test_predict_with_batch_eval_metrics(self):
trainer = get_regression_trainer(
a=1.5, b=2.5, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
)
results = trainer.predict(trainer.eval_dataset)
preds = results.predictions
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
gt = 1.5 * x + 2.5
self.assertTrue(np.allclose(preds, gt))
expected_acc = AlmostAccuracy()((preds, y))["accuracy"]
self.assertAlmostEqual(results.metrics["test_accuracy"], expected_acc)
# With a number of elements not a round multiple of the batch size
trainer = get_regression_trainer(
a=1.5, b=2.5, eval_len=66, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
)
results = trainer.predict(trainer.eval_dataset)
preds = results.predictions
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
self.assertTrue(np.allclose(preds, 1.5 * x + 2.5))
expected_acc = AlmostAccuracy()((preds, y))["accuracy"]
self.assertAlmostEqual(results.metrics["test_accuracy"], expected_acc)
# With more than one output of the model
trainer = get_regression_trainer(
a=1.5, b=2.5, double_output=True, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
)
preds = trainer.predict(trainer.eval_dataset).predictions
x = trainer.eval_dataset.x
self.assertEqual(len(preds), 2)
self.assertTrue(np.allclose(preds[0], 1.5 * x + 2.5))
self.assertTrue(np.allclose(preds[1], 1.5 * x + 2.5))
# With more than one output/label of the model
trainer = get_regression_trainer(
a=1.5,
b=2.5,
double_output=True,
label_names=["labels", "labels_2"],
compute_metrics=AlmostAccuracyBatched(),
batch_eval_metrics=True,
)
outputs = trainer.predict(trainer.eval_dataset)
preds = outputs.predictions
labels = outputs.label_ids
x = trainer.eval_dataset.x
self.assertEqual(len(preds), 2)
self.assertTrue(np.allclose(preds[0], 1.5 * x + 2.5))
self.assertTrue(np.allclose(preds[1], 1.5 * x + 2.5))
self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
def test_predict_with_jit(self):
trainer = get_regression_trainer(a=1.5, b=2.5, jit_mode_eval=True)
preds = trainer.predict(trainer.eval_dataset).predictions