Fix Trainer and Args to mention AdamW, not Adam. (#9685)
* Fix Trainer and Args to mention AdamW, not Adam. * Update the docs for Training Arguments. * Change arguments adamw_* to adam_* * Fixed links to AdamW in TrainerArguments docs * Fix line length in Training Args docs.
This commit is contained in:
@@ -104,15 +104,16 @@ class TrainingArguments:
|
|||||||
left unset, the whole predictions are accumulated on GPU/TPU before being moved to the CPU (faster but
|
left unset, the whole predictions are accumulated on GPU/TPU before being moved to the CPU (faster but
|
||||||
requires more memory).
|
requires more memory).
|
||||||
learning_rate (:obj:`float`, `optional`, defaults to 5e-5):
|
learning_rate (:obj:`float`, `optional`, defaults to 5e-5):
|
||||||
The initial learning rate for Adam.
|
The initial learning rate for :class:`~transformers.AdamW` optimizer.
|
||||||
weight_decay (:obj:`float`, `optional`, defaults to 0):
|
weight_decay (:obj:`float`, `optional`, defaults to 0):
|
||||||
The weight decay to apply (if not zero).
|
The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in
|
||||||
|
:class:`~transformers.AdamW` optimizer.
|
||||||
adam_beta1 (:obj:`float`, `optional`, defaults to 0.9):
|
adam_beta1 (:obj:`float`, `optional`, defaults to 0.9):
|
||||||
The beta1 hyperparameter for the Adam optimizer.
|
The beta1 hyperparameter for the :class:`~transformers.AdamW` optimizer.
|
||||||
adam_beta2 (:obj:`float`, `optional`, defaults to 0.999):
|
adam_beta2 (:obj:`float`, `optional`, defaults to 0.999):
|
||||||
The beta2 hyperparameter for the Adam optimizer.
|
The beta2 hyperparameter for the :class:`~transformers.AdamW` optimizer.
|
||||||
adam_epsilon (:obj:`float`, `optional`, defaults to 1e-8):
|
adam_epsilon (:obj:`float`, `optional`, defaults to 1e-8):
|
||||||
The epsilon hyperparameter for the Adam optimizer.
|
The epsilon hyperparameter for the :class:`~transformers.AdamW` optimizer.
|
||||||
max_grad_norm (:obj:`float`, `optional`, defaults to 1.0):
|
max_grad_norm (:obj:`float`, `optional`, defaults to 1.0):
|
||||||
Maximum gradient norm (for gradient clipping).
|
Maximum gradient norm (for gradient clipping).
|
||||||
num_train_epochs(:obj:`float`, `optional`, defaults to 3.0):
|
num_train_epochs(:obj:`float`, `optional`, defaults to 3.0):
|
||||||
@@ -288,11 +289,11 @@ class TrainingArguments:
|
|||||||
metadata={"help": "Number of predictions steps to accumulate before moving the tensors to the CPU."},
|
metadata={"help": "Number of predictions steps to accumulate before moving the tensors to the CPU."},
|
||||||
)
|
)
|
||||||
|
|
||||||
learning_rate: float = field(default=5e-5, metadata={"help": "The initial learning rate for Adam."})
|
learning_rate: float = field(default=5e-5, metadata={"help": "The initial learning rate for AdamW."})
|
||||||
weight_decay: float = field(default=0.0, metadata={"help": "Weight decay if we apply some."})
|
weight_decay: float = field(default=0.0, metadata={"help": "Weight decay for AdamW if we apply some."})
|
||||||
adam_beta1: float = field(default=0.9, metadata={"help": "Beta1 for Adam optimizer"})
|
adam_beta1: float = field(default=0.9, metadata={"help": "Beta1 for AdamW optimizer"})
|
||||||
adam_beta2: float = field(default=0.999, metadata={"help": "Beta2 for Adam optimizer"})
|
adam_beta2: float = field(default=0.999, metadata={"help": "Beta2 for AdamW optimizer"})
|
||||||
adam_epsilon: float = field(default=1e-8, metadata={"help": "Epsilon for Adam optimizer."})
|
adam_epsilon: float = field(default=1e-8, metadata={"help": "Epsilon for AdamW optimizer."})
|
||||||
max_grad_norm: float = field(default=1.0, metadata={"help": "Max gradient norm."})
|
max_grad_norm: float = field(default=1.0, metadata={"help": "Max gradient norm."})
|
||||||
|
|
||||||
num_train_epochs: float = field(default=3.0, metadata={"help": "Total number of training epochs to perform."})
|
num_train_epochs: float = field(default=3.0, metadata={"help": "Total number of training epochs to perform."})
|
||||||
@@ -407,7 +408,7 @@ class TrainingArguments:
|
|||||||
label_smoothing_factor: float = field(
|
label_smoothing_factor: float = field(
|
||||||
default=0.0, metadata={"help": "The label smoothing epsilon to apply (zero means no label smoothing)."}
|
default=0.0, metadata={"help": "The label smoothing epsilon to apply (zero means no label smoothing)."}
|
||||||
)
|
)
|
||||||
adafactor: bool = field(default=False, metadata={"help": "Whether or not to replace Adam by Adafactor."})
|
adafactor: bool = field(default=False, metadata={"help": "Whether or not to replace AdamW by Adafactor."})
|
||||||
group_by_length: bool = field(
|
group_by_length: bool = field(
|
||||||
default=False,
|
default=False,
|
||||||
metadata={"help": "Whether or not to group samples of roughly the same length together when batching."},
|
metadata={"help": "Whether or not to group samples of roughly the same length together when batching."},
|
||||||
|
|||||||
Reference in New Issue
Block a user