Trainer - add cache clearing and the option for batched eval metrics computation (#28769)
* Added cache clearing for GPU efficiency. * Added cache clearing for GPU efficiency. * Added batch_eval_metrics capability * Ran make fixup * Fixed bug * Fixed whitespace issue * Fixed outdated condition * Updated docstrings with instructions for batch_eval_metrics. Updated end of dataloader logic * Added first version of batch_eval_metrics Trainer test * Fixed batch_eval_metrics Trainer tests for both eval and predict * Fixed batch_eval_metrics behavior for new Trainer variables * Fixed batch_eval_metrics Trainer tests * Ran fixup
This commit is contained in:
@@ -230,6 +230,27 @@ class AlmostAccuracy:
|
||||
return {"accuracy": true.astype(np.float32).mean().item()}
|
||||
|
||||
|
||||
class AlmostAccuracyBatched:
|
||||
def __init__(self, thresh=0.25):
|
||||
self.thresh = thresh
|
||||
self.batch_acc = []
|
||||
|
||||
def __call__(self, eval_pred, compute_result):
|
||||
predictions, labels = eval_pred
|
||||
if isinstance(predictions, tuple):
|
||||
predictions = predictions[0]
|
||||
if isinstance(labels, tuple):
|
||||
labels = labels[0]
|
||||
batch_size = len(predictions)
|
||||
true = torch.abs(predictions - labels) <= self.thresh
|
||||
acc = true.type(torch.FloatTensor).mean().item()
|
||||
self.batch_acc.extend([acc] * batch_size)
|
||||
if compute_result:
|
||||
result = {"accuracy": np.mean(self.batch_acc).item()}
|
||||
self.batch_acc = []
|
||||
return result
|
||||
|
||||
|
||||
class RegressionModelConfig(PretrainedConfig):
|
||||
def __init__(self, a=0, b=0, double_output=False, random_torch=True, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@@ -1524,6 +1545,49 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
expected_acc = AlmostAccuracy()((pred + 1, y))["accuracy"]
|
||||
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
|
||||
|
||||
def test_evaluate_with_batch_eval_metrics(self):
|
||||
trainer = get_regression_trainer(
|
||||
a=1.5, b=2.5, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
|
||||
)
|
||||
results = trainer.evaluate()
|
||||
|
||||
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
|
||||
pred = 1.5 * x + 2.5
|
||||
expected_loss = ((pred - y) ** 2).mean()
|
||||
self.assertAlmostEqual(results["eval_loss"], expected_loss)
|
||||
expected_acc = AlmostAccuracy()((pred, y))["accuracy"]
|
||||
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
|
||||
|
||||
# With a number of elements not a round multiple of the batch size
|
||||
trainer = get_regression_trainer(
|
||||
a=1.5, b=2.5, eval_len=66, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
|
||||
)
|
||||
results = trainer.evaluate()
|
||||
|
||||
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
|
||||
pred = 1.5 * x + 2.5
|
||||
expected_loss = ((pred - y) ** 2).mean()
|
||||
self.assertAlmostEqual(results["eval_loss"], expected_loss)
|
||||
expected_acc = AlmostAccuracy()((pred, y))["accuracy"]
|
||||
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
|
||||
|
||||
# With logits preprocess
|
||||
trainer = get_regression_trainer(
|
||||
a=1.5,
|
||||
b=2.5,
|
||||
compute_metrics=AlmostAccuracyBatched(),
|
||||
batch_eval_metrics=True,
|
||||
preprocess_logits_for_metrics=lambda logits, labels: logits + 1,
|
||||
)
|
||||
results = trainer.evaluate()
|
||||
|
||||
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
|
||||
pred = 1.5 * x + 2.5
|
||||
expected_loss = ((pred - y) ** 2).mean()
|
||||
self.assertAlmostEqual(results["eval_loss"], expected_loss)
|
||||
expected_acc = AlmostAccuracy()((pred + 1, y))["accuracy"]
|
||||
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
|
||||
|
||||
def test_evaluate_with_jit(self):
|
||||
trainer = get_regression_trainer(a=1.5, b=2.5, compute_metrics=AlmostAccuracy(), jit_mode_eval=True)
|
||||
results = trainer.evaluate()
|
||||
@@ -1651,6 +1715,58 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
|
||||
self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
|
||||
|
||||
def test_predict_with_batch_eval_metrics(self):
|
||||
trainer = get_regression_trainer(
|
||||
a=1.5, b=2.5, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
|
||||
)
|
||||
results = trainer.predict(trainer.eval_dataset)
|
||||
preds = results.predictions
|
||||
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
|
||||
gt = 1.5 * x + 2.5
|
||||
self.assertTrue(np.allclose(preds, gt))
|
||||
expected_acc = AlmostAccuracy()((preds, y))["accuracy"]
|
||||
self.assertAlmostEqual(results.metrics["test_accuracy"], expected_acc)
|
||||
|
||||
# With a number of elements not a round multiple of the batch size
|
||||
trainer = get_regression_trainer(
|
||||
a=1.5, b=2.5, eval_len=66, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
|
||||
)
|
||||
results = trainer.predict(trainer.eval_dataset)
|
||||
preds = results.predictions
|
||||
x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
|
||||
self.assertTrue(np.allclose(preds, 1.5 * x + 2.5))
|
||||
expected_acc = AlmostAccuracy()((preds, y))["accuracy"]
|
||||
self.assertAlmostEqual(results.metrics["test_accuracy"], expected_acc)
|
||||
|
||||
# With more than one output of the model
|
||||
trainer = get_regression_trainer(
|
||||
a=1.5, b=2.5, double_output=True, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
|
||||
)
|
||||
preds = trainer.predict(trainer.eval_dataset).predictions
|
||||
x = trainer.eval_dataset.x
|
||||
self.assertEqual(len(preds), 2)
|
||||
self.assertTrue(np.allclose(preds[0], 1.5 * x + 2.5))
|
||||
self.assertTrue(np.allclose(preds[1], 1.5 * x + 2.5))
|
||||
|
||||
# With more than one output/label of the model
|
||||
trainer = get_regression_trainer(
|
||||
a=1.5,
|
||||
b=2.5,
|
||||
double_output=True,
|
||||
label_names=["labels", "labels_2"],
|
||||
compute_metrics=AlmostAccuracyBatched(),
|
||||
batch_eval_metrics=True,
|
||||
)
|
||||
outputs = trainer.predict(trainer.eval_dataset)
|
||||
preds = outputs.predictions
|
||||
labels = outputs.label_ids
|
||||
x = trainer.eval_dataset.x
|
||||
self.assertEqual(len(preds), 2)
|
||||
self.assertTrue(np.allclose(preds[0], 1.5 * x + 2.5))
|
||||
self.assertTrue(np.allclose(preds[1], 1.5 * x + 2.5))
|
||||
self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
|
||||
self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
|
||||
|
||||
def test_predict_with_jit(self):
|
||||
trainer = get_regression_trainer(a=1.5, b=2.5, jit_mode_eval=True)
|
||||
preds = trainer.predict(trainer.eval_dataset).predictions
|
||||
|
||||
Reference in New Issue
Block a user