higher atol to avoid flaky trainer test failure (#17979)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -1252,8 +1252,8 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
trainer.train(resume_from_checkpoint=os.path.join(tmp_dir, "checkpoint-15"))
|
trainer.train(resume_from_checkpoint=os.path.join(tmp_dir, "checkpoint-15"))
|
||||||
(a1, b1) = trainer.model.a.item(), trainer.model.b.item()
|
(a1, b1) = trainer.model.a.item(), trainer.model.b.item()
|
||||||
|
|
||||||
self.assertAlmostEqual(a, a1, delta=1e-8)
|
self.assertAlmostEqual(a, a1, delta=1e-5)
|
||||||
self.assertAlmostEqual(b, b1, delta=1e-8)
|
self.assertAlmostEqual(b, b1, delta=1e-5)
|
||||||
|
|
||||||
with self.subTest("Test every epoch"):
|
with self.subTest("Test every epoch"):
|
||||||
config = RegressionModelConfig(a=0, b=2, random_torch=random_torch)
|
config = RegressionModelConfig(a=0, b=2, random_torch=random_torch)
|
||||||
@@ -1277,8 +1277,8 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
trainer.train(resume_from_checkpoint=os.path.join(tmp_dir, checkpoint_dir))
|
trainer.train(resume_from_checkpoint=os.path.join(tmp_dir, checkpoint_dir))
|
||||||
(a1, b1) = trainer.model.a.item(), trainer.model.b.item()
|
(a1, b1) = trainer.model.a.item(), trainer.model.b.item()
|
||||||
|
|
||||||
self.assertAlmostEqual(a, a1, delta=1e-8)
|
self.assertAlmostEqual(a, a1, delta=1e-5)
|
||||||
self.assertAlmostEqual(b, b1, delta=1e-8)
|
self.assertAlmostEqual(b, b1, delta=1e-5)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_non_multi_gpu
|
@require_torch_non_multi_gpu
|
||||||
|
|||||||
Reference in New Issue
Block a user