From 44127ec66787234dcd76afe31dab4cd79cccf8b6 Mon Sep 17 00:00:00 2001
From: Zach Mueller <muellerzr@gmail.com>
Date: Mon, 11 Dec 2023 09:57:41 -0500
Subject: [PATCH] Fix test for auto_find_batch_size on multi-GPU (#27947)

* Fix test for multi-GPU

* WIth CPU handle
---
 tests/trainer/test_trainer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 22c43071aa..05f84bc00f 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -1558,7 +1558,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
         class MockCudaOOMCallback(TrainerCallback):
             def on_step_end(self, args, state, control, **kwargs):
                 # simulate OOM on the first step
-                if state.train_batch_size == 16:
+                if state.train_batch_size >= 16:
                     raise RuntimeError("CUDA out of memory.")
 
         args = RegressionTrainingArguments(
@@ -1577,7 +1577,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
         # We can then make a new Trainer
         trainer = Trainer(model, args, train_dataset=train_dataset)
         # Check we are at 16 to start
-        self.assertEqual(trainer._train_batch_size, 16)
+        self.assertEqual(trainer._train_batch_size, 16 * max(trainer.args.n_gpu, 1))
         trainer.train(resume_from_checkpoint=True)
         # We should be back to 8 again, picking up based upon the last ran Trainer
         self.assertEqual(trainer._train_batch_size, 8)