From 88e18b3c6317388eaad34111e309afdfc827f670 Mon Sep 17 00:00:00 2001
From: "Sean (Seok-Won) Yi" <seantuokc@gmail.com>
Date: Thu, 9 Jan 2025 00:32:35 +0900
Subject: [PATCH] Update doc for `metric_for_best_model` when
 `save_strategy="best"`. (#35389)

* Updated docstring for _determine_best_metric.

* Updated docstring for metric_for_best_model.

* Added test case for save strategy.

* Updated incorrect test case.

* Changed eval_strategy to match save_strategy.

* Separated test cases for metric.

* Allow load_best_model when save_strategy == "best".

* Updated docstring for metric_for_best_model.
---
 src/transformers/trainer.py       |  1 -
 src/transformers/training_args.py | 12 +++++++-----
 tests/trainer/test_trainer.py     | 19 +++++++++++++++++--
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index 2cdf9c72f7..4cd9a6b4df 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -3156,7 +3156,6 @@ class Trainer:
     def _determine_best_metric(self, metrics, trial):
         """
         Determine if the model should be saved based on the evaluation metrics.
-        If args.metric_for_best_model is not set, the loss is used.
 
         Returns:
             bool: True if a new best metric was found, else False
diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py
index a1b5b511a9..a7b2ba0db3 100644
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -476,11 +476,13 @@ class TrainingArguments:
 
         metric_for_best_model (`str`, *optional*):
             Use in conjunction with `load_best_model_at_end` to specify the metric to use to compare two different
-            models. Must be the name of a metric returned by the evaluation with or without the prefix `"eval_"`. Will
-            default to `"loss"` if unspecified and `load_best_model_at_end=True` (to use the evaluation loss).
+            models. Must be the name of a metric returned by the evaluation with or without the prefix `"eval_"`.
 
-            If you set this value, `greater_is_better` will default to `True`. Don't forget to set it to `False` if
-            your metric is better when lower.
+            If not specified, this will default to `"loss"` when either `load_best_model_at_end == True`
+            or `lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU` (to use the evaluation loss).
+
+            If you set this value, `greater_is_better` will default to `True` unless the name ends with "loss".
+            Don't forget to set it to `False` if your metric is better when lower.
         greater_is_better (`bool`, *optional*):
             Use in conjunction with `load_best_model_at_end` and `metric_for_best_model` to specify if better models
             should have a greater metric or not. Will default to:
@@ -1636,7 +1638,7 @@ class TrainingArguments:
             self.save_steps = int(self.save_steps)
 
         # Sanity checks for load_best_model_at_end: we require save and eval strategies to be compatible.
-        if self.load_best_model_at_end:
+        if self.load_best_model_at_end and self.save_strategy != SaveStrategy.BEST:
             if self.eval_strategy != self.save_strategy:
                 raise ValueError(
                     "--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation "
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index d33be27897..f39adf13e0 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -4220,7 +4220,9 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
                     total=total,
                 )
 
-        # Case 3: Metric name not provided; throw error.
+    def test_metric_for_best_model_behavior(self):
+        # Case 1: Metric name not provided when `save_strategy == "best"`.
+        # Should raise ValueError.
         with tempfile.TemporaryDirectory() as tmpdir:
             with self.assertRaises(ValueError) as context:
                 trainer = get_regression_trainer(
@@ -4232,9 +4234,22 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
                     save_strategy="best",
                     compute_metrics=AlmostAccuracy(),
                 )
-
             self.assertIn("`args.metric_for_best_model` must be provided", str(context.exception))
 
+        # Case 2: Metric name not provided when `load_best_model_at_end == True`.
+        # `metric_for_best_model` should be set to `"loss"` by default.
+        with tempfile.TemporaryDirectory() as tmpdir:
+            trainer = get_regression_trainer(
+                a=1.5,
+                b=2.5,
+                output_dir=tmpdir,
+                learning_rate=0.1,
+                eval_strategy="steps",
+                save_strategy="steps",
+                load_best_model_at_end=True,
+            )
+            self.assertTrue(trainer.args.metric_for_best_model == "loss")
+
 
 @require_torch
 @is_staging_test