Fix condition when GA loss bug fix is not performed (#35651)

* fix condition when GA loss bug fix is not performed

* max loss diff is 2.29

* fix typo

* add an extra validation that loss should not vary too much
This commit is contained in:
kang sheng
2025-01-16 20:59:53 +08:00
committed by GitHub
parent fd4f14c968
commit 2cbcc5877d
2 changed files with 10 additions and 10 deletions

View File

@@ -855,7 +855,14 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon):
self.assertLess(max(diff_truth), 0.01, f"Difference {max(diff_truth)} is not within 0.01")
# max diff broken should be very off
self.assertGreater(max(diff_broken), 3, f"Difference {max(diff_broken)} is not greater than 3")
self.assertGreater(max(diff_broken), 2, f"Difference {max(diff_broken)} is not greater than 2")
loss_base = sum(base_loss_callback.losses)
loss_broken = sum(broken_loss_callback.losses)
# mean/sum loss should not vary too much.
relative_diff = abs(loss_base - loss_broken) / max(loss_base, loss_broken)
self.assertLess(relative_diff, 0.1, f"Relative difference {relative_diff} is not within 0.1")
@slow
def test_gradient_accumulation_loss_alignment_with_loss_func(self):