Don't reset the dataset type + plug for rm unused columns (#6683)

* Don't reset the type of the dataset * Formatting * Update trainer.py Co-authored-by: Teven <teven.lescao@gmail.com>
2020-08-24 09:22:03 -04:00
parent 1a779ad7ec
commit b30879fe0c
2 changed files with 15 additions and 1 deletions
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -114,6 +114,11 @@ class TrainingArguments:
            at the next training step under the keyword argument ``mems``.
        run_name (:obj:`str`, `optional`):
            A descriptor for the run. Notably used for wandb logging.
+        remove_unused_columns (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            If using `nlp.Dataset` datasets, whether or not to automatically remove the columns unused by the model
+            forward method.
+
+            (Note: this behavior is not implemented for :class:`~transformers.TFTrainer` yet.)
    """

    output_dir: str = field(
@@ -234,6 +239,10 @@ class TrainingArguments:
        default=None, metadata={"help": "An optional descriptor for the run. Notably used for wandb logging."}
    )

+    remove_unused_columns: Optional[bool] = field(
+        default=True, metadata={"help": "Remove columns not required by the model when using an nlp.Dataset."}
+    )
+
    @property
    def train_batch_size(self) -> int:
        """