[DOC] Clarify relationshi load_best_model_at_end and save_total_limit (#24614)
* Update training_args.py Clarify the relationship between `load_best_model_at_end` and `save_total_limit`. * fix: faulty quotes * make quality * Update src/transformers/training_args.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * DOCS: add explicit `True` * DOCS: make style/quality --------- Co-authored-by: Bram Vanroy <Bram.Vanroy@UGent.be> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -283,7 +283,11 @@ class TrainingArguments:
|
|||||||
float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps.
|
float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps.
|
||||||
save_total_limit (`int`, *optional*):
|
save_total_limit (`int`, *optional*):
|
||||||
If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in
|
If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in
|
||||||
`output_dir`.
|
`output_dir`. When `load_best_model_at_end` is enabled, the "best" checkpoint according to
|
||||||
|
`metric_for_best_model` will always be retained in addition to the most recent ones. For example, for
|
||||||
|
`save_total_limit=5` and `load_best_model_at_end`, the four last checkpoints will always be retained
|
||||||
|
alongside the best model. When `save_total_limit=1` and `load_best_model_at_end`, it is possible that two
|
||||||
|
checkpoints are saved: the last one and the best one (if they are different).
|
||||||
save_safetensors (`bool`, *optional*, defaults to `False`):
|
save_safetensors (`bool`, *optional*, defaults to `False`):
|
||||||
Use [safetensors](https://huggingface.co/docs/safetensors) saving and loading for state dicts instead of
|
Use [safetensors](https://huggingface.co/docs/safetensors) saving and loading for state dicts instead of
|
||||||
default `torch.load` and `torch.save`.
|
default `torch.load` and `torch.save`.
|
||||||
@@ -371,7 +375,10 @@ class TrainingArguments:
|
|||||||
except if the model used is one of the `XxxForQuestionAnswering` in which case it will also include the
|
except if the model used is one of the `XxxForQuestionAnswering` in which case it will also include the
|
||||||
`["start_positions", "end_positions"]` keys.
|
`["start_positions", "end_positions"]` keys.
|
||||||
load_best_model_at_end (`bool`, *optional*, defaults to `False`):
|
load_best_model_at_end (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to load the best model found during training at the end of training.
|
Whether or not to load the best model found during training at the end of training. When this option is
|
||||||
|
enabled, the best checkpoint will always be saved. See
|
||||||
|
[`save_total_limit`](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments.save_total_limit)
|
||||||
|
for more.
|
||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
@@ -761,8 +768,13 @@ class TrainingArguments:
|
|||||||
default=None,
|
default=None,
|
||||||
metadata={
|
metadata={
|
||||||
"help": (
|
"help": (
|
||||||
"Limit the total amount of checkpoints. "
|
"If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in"
|
||||||
"Deletes the older checkpoints in the output_dir. Default is unlimited checkpoints"
|
" `output_dir`. When `load_best_model_at_end` is enabled, the 'best' checkpoint according to"
|
||||||
|
" `metric_for_best_model` will always be retained in addition to the most recent ones. For example,"
|
||||||
|
" for `save_total_limit=5` and `load_best_model_at_end=True`, the four last checkpoints will always be"
|
||||||
|
" retained alongside the best model. When `save_total_limit=1` and `load_best_model_at_end=True`,"
|
||||||
|
" it is possible that two checkpoints are saved: the last one and the best one (if they are different)."
|
||||||
|
" Default is unlimited checkpoints"
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -924,10 +936,14 @@ class TrainingArguments:
|
|||||||
label_names: Optional[List[str]] = field(
|
label_names: Optional[List[str]] = field(
|
||||||
default=None, metadata={"help": "The list of keys in your dictionary of inputs that correspond to the labels."}
|
default=None, metadata={"help": "The list of keys in your dictionary of inputs that correspond to the labels."}
|
||||||
)
|
)
|
||||||
|
|
||||||
load_best_model_at_end: Optional[bool] = field(
|
load_best_model_at_end: Optional[bool] = field(
|
||||||
default=False,
|
default=False,
|
||||||
metadata={"help": "Whether or not to load the best model found during training at the end of training."},
|
metadata={
|
||||||
|
"help": (
|
||||||
|
"Whether or not to load the best model found during training at the end of training. When this option"
|
||||||
|
" is enabled, the best checkpoint will always be saved. See `save_total_limit` for more."
|
||||||
|
)
|
||||||
|
},
|
||||||
)
|
)
|
||||||
metric_for_best_model: Optional[str] = field(
|
metric_for_best_model: Optional[str] = field(
|
||||||
default=None, metadata={"help": "The metric to use to compare two different models."}
|
default=None, metadata={"help": "The metric to use to compare two different models."}
|
||||||
|
|||||||
Reference in New Issue
Block a user