Fix grad accum arbitrary value (#36691)

2025-03-14 22:03:01 +01:00
parent 2c2495cc7b
commit 6f3e0b68e0
2 changed files with 3 additions and 4 deletions
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@@ -813,7 +813,7 @@ def is_torch_hpu_available():

        def patched_masked_fill_(self, mask, value):
            if self.dtype == torch.int64:
-                logger.warning(
+                logger.warning_once(
                    "In-place tensor.masked_fill_(mask, value) is not supported for int64 tensors on Gaudi1. "
                    "This operation will be performed out-of-place using tensor[mask] = value."
                )
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -896,9 +896,8 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon):

        # all diff truth should be quite close
        self.assertLess(max(diff_truth), 0.01, f"Difference {max(diff_truth)} is not within 0.01")
-
-        # max diff broken should be very off
-        self.assertGreater(max(diff_broken), 1.3, f"Difference {max(diff_broken)} is not greater than 1.3")
+        # max diff broken should be very off ("very off" is arbitrary, but as long as it's bigger than 0.1, it's fine)
+        self.assertGreater(max(diff_broken), 0.7, f"Difference {max(diff_broken)} is not greater than 0.7")

        loss_base = sum(base_loss_callback.losses)
        loss_broken = sum(broken_loss_callback.losses)