[Trainer] memory tracker metrics (#10225)
* memory tracker metrics * go back to eval for somewhat consistency * handle no-gpu case * deal with stackable eval calls * restore callback order * style * simplify the API * add test * docs * consistently use eval_ prefix * improve docs * Update src/transformers/trainer_utils.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * rename method * style Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -884,6 +884,34 @@ class TrainerIntegrationTest(unittest.TestCase):
|
||||
trainer.train()
|
||||
self.assertTrue(isinstance(trainer.state.total_flos, float))
|
||||
|
||||
def check_mem_metrics(self, trainer, check_func):
|
||||
metrics = trainer.train().metrics
|
||||
check_func("init_mem_cpu_alloc_delta", metrics)
|
||||
check_func("train_mem_cpu_alloc_delta", metrics)
|
||||
if torch.cuda.device_count() > 0:
|
||||
check_func("init_mem_gpu_alloc_delta", metrics)
|
||||
check_func("train_mem_gpu_alloc_delta", metrics)
|
||||
|
||||
metrics = trainer.evaluate()
|
||||
check_func("eval_mem_cpu_alloc_delta", metrics)
|
||||
if torch.cuda.device_count() > 0:
|
||||
check_func("eval_mem_gpu_alloc_delta", metrics)
|
||||
|
||||
metrics = trainer.predict(RegressionDataset()).metrics
|
||||
check_func("test_mem_cpu_alloc_delta", metrics)
|
||||
if torch.cuda.device_count() > 0:
|
||||
check_func("test_mem_gpu_alloc_delta", metrics)
|
||||
|
||||
def test_mem_metrics(self):
|
||||
|
||||
# with mem metrics enabled
|
||||
trainer = get_regression_trainer()
|
||||
self.check_mem_metrics(trainer, self.assertIn)
|
||||
|
||||
# with mem metrics disabled
|
||||
trainer = get_regression_trainer(skip_memory_metrics=True)
|
||||
self.check_mem_metrics(trainer, self.assertNotIn)
|
||||
|
||||
|
||||
@require_torch
|
||||
@require_optuna
|
||||
|
||||
Reference in New Issue
Block a user