From 25fcb5c171ba68525e01863fd8619f4598f6b9f1 Mon Sep 17 00:00:00 2001 From: abhishek thakur Date: Thu, 28 Jan 2021 08:50:46 +0100 Subject: [PATCH] Pin memory in Trainer by default (#9857) Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Stas Bekman --- src/transformers/trainer.py | 5 ++++- src/transformers/training_args.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 7472712c74..b76a360191 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -485,6 +485,7 @@ class Trainer: collate_fn=self.data_collator, drop_last=self.args.dataloader_drop_last, num_workers=self.args.dataloader_num_workers, + pin_memory=self.args.pin_memory, ) def _get_eval_sampler(self, eval_dataset: Dataset) -> Optional[torch.utils.data.sampler.Sampler]: @@ -522,6 +523,7 @@ class Trainer: collate_fn=self.data_collator, drop_last=self.args.dataloader_drop_last, num_workers=self.args.dataloader_num_workers, + pin_memory=self.args.pin_memory, ) def get_test_dataloader(self, test_dataset: Dataset) -> DataLoader: @@ -548,6 +550,7 @@ class Trainer: batch_size=self.args.eval_batch_size, collate_fn=self.data_collator, drop_last=self.args.dataloader_drop_last, + pin_memory=self.args.pin_memory, ) def create_optimizer_and_scheduler(self, num_training_steps: int): @@ -1140,7 +1143,7 @@ class Trainer: direction: str = "minimize", backend: Optional[Union["str", HPSearchBackend]] = None, hp_name: Optional[Callable[["optuna.Trial"], str]] = None, - **kwargs + **kwargs, ) -> BestRun: """ Launch an hyperparameter search using ``optuna`` or ``Ray Tune``. The optimized quantity is determined by diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 6811c61154..e4e8aad96d 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -242,8 +242,10 @@ class TrainingArguments: :obj:`"comet_ml"`, :obj:`"mlflow"`, :obj:`"tensorboard"` and :obj:`"wandb"`. ddp_find_unused_parameters (:obj:`bool`, `optional`): When using distributed training, the value of the flag :obj:`find_unused_parameters` passed to - :obj:`DistributedDataParallel`. Will defaut to :obj:`False` if gradient checkpointing is used, :obj:`True` + :obj:`DistributedDataParallel`. Will default to :obj:`False` if gradient checkpointing is used, :obj:`True` otherwise. + pin_memory (:obj:`bool`, `optional`, defaults to :obj:`True`)): + Whether you want to pin memory in data loaders or not. Will default to :obj:`True`. """ output_dir: str = field( @@ -436,6 +438,7 @@ class TrainingArguments: "`DistributedDataParallel`." }, ) + pin_memory: bool = field(default=True, metadata={"help": "Whether or not to pin memory for data loaders."}) _n_gpu: int = field(init=False, repr=False, default=-1) def __post_init__(self):