Add predict step accumulation (#7767)
* Add eval_accumulation_step and clean distributed eval * Add TPU test * Add TPU stuff * Fix arg name * Fix Seq2SeqTrainer * Fix total_size * Update src/transformers/trainer_pt_utils.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Doc and add test to TPU * Add unit test * Adapt name Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
@@ -67,7 +67,7 @@ class TrainingArguments:
|
||||
The batch size per GPU/TPU core/CPU for training.
|
||||
per_device_eval_batch_size (:obj:`int`, `optional`, defaults to 8):
|
||||
The batch size per GPU/TPU core/CPU for evaluation.
|
||||
gradient_accumulation_steps: (:obj:`int`, `optional`, defaults to 1):
|
||||
gradient_accumulation_steps (:obj:`int`, `optional`, defaults to 1):
|
||||
Number of updates steps to accumulate the gradients for, before performing a backward/update pass.
|
||||
|
||||
.. warning::
|
||||
@@ -75,6 +75,10 @@ class TrainingArguments:
|
||||
When using gradient accumulation, one step is counted as one step with backward pass. Therefore,
|
||||
logging, evaluation, save will be conducted every ``gradient_accumulation_steps * xxx_step`` training
|
||||
examples.
|
||||
eval_accumulation_steps (:obj:`int`, `optional`):
|
||||
Number of predictions steps to accumulate the output tensors for, before moving the results to the CPU. If
|
||||
left unset, the whole predictions are accumulated on GPU/TPU before being moved to the CPU (faster but
|
||||
requires more memory).
|
||||
learning_rate (:obj:`float`, `optional`, defaults to 5e-5):
|
||||
The initial learning rate for Adam.
|
||||
weight_decay (:obj:`float`, `optional`, defaults to 0):
|
||||
@@ -225,6 +229,10 @@ class TrainingArguments:
|
||||
default=1,
|
||||
metadata={"help": "Number of updates steps to accumulate before performing a backward/update pass."},
|
||||
)
|
||||
eval_accumulation_steps: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={"help": "Number of predictions steps to accumulate before moving the tensors to the CPU."},
|
||||
)
|
||||
|
||||
learning_rate: float = field(default=5e-5, metadata={"help": "The initial learning rate for Adam."})
|
||||
weight_decay: float = field(default=0.0, metadata={"help": "Weight decay if we apply some."})
|
||||
|
||||
Reference in New Issue
Block a user