From abc0202194674ae5e241e547f3af34b4226bdc72 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Tue, 25 Aug 2020 07:07:36 -0400
Subject: [PATCH] More tests to Trainer (#6699)

* More tests to Trainer

* Add warning in the doc
---
 .circleci/config.yml        |  3 ++
 src/transformers/trainer.py | 43 +++++++++++++++------
 tests/test_trainer.py       | 75 +++++++++++++++++++++++++++++++++++--
 3 files changed, 106 insertions(+), 15 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index c55b2f5832..ef1da2791b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -77,6 +77,7 @@ jobs:
                       - v0.3-torch_and_tf-{{ checksum "setup.py" }}
                       - v0.3-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
+            - run: pip install git+https://github.com/huggingface/nlp
             - run: pip install .[sklearn,tf-cpu,torch,testing]
             - run: pip install codecov pytest-cov
             - save_cache:
@@ -103,6 +104,7 @@ jobs:
                       - v0.3-torch-{{ checksum "setup.py" }}
                       - v0.3-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
+            - run: pip install git+https://github.com/huggingface/nlp
             - run: pip install .[sklearn,torch,testing]
             - save_cache:
                   key: v0.3-torch-{{ checksum "setup.py" }}
@@ -127,6 +129,7 @@ jobs:
                       - v0.3-tf-{{ checksum "setup.py" }}
                       - v0.3-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
+            - run: pip install git+https://github.com/huggingface/nlp
             - run: pip install .[sklearn,tf-cpu,testing]
             - save_cache:
                   key: v0.3-tf-{{ checksum "setup.py" }}
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index 5a8d0a9709..0765a4307d 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -206,22 +206,29 @@ class Trainer:
         optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
         **kwargs,
     ):
+        if args is None:
+            logger.info("No `TrainingArguments` passed, using the current path as `output_dir`.")
+            args = TrainingArguments("tmp_trainer")
+        self.args = args
+        # Seed must be set before instantiating the model when using model
+        set_seed(self.args.seed)
         assert (
             model is not None or model_init is not None
         ), "You must provide a model to use `Trainer`, either by using the `model` argument or the `model_init` argument."
         if model is None and model_init is not None:
             model = model_init()
         self.model = model.to(args.device) if model is not None else None
-        if args is None:
-            logger.info("No `TrainingArguments` passed, using the current path as `output_dir`.")
-            args = TrainingArguments("tmp_trainer")
-        self.args = args
         self.data_collator = data_collator if data_collator is not None else default_data_collator
         self.train_dataset = train_dataset
         self.eval_dataset = eval_dataset
         self.model_init = model_init
         self.compute_metrics = compute_metrics
         self.optimizer, self.lr_scheduler = optimizers
+        if model_init is not None and (self.optimizer is not None or self.lr_scheduler is not None):
+            raise RuntimeError(
+                "Passing a `model_init` is incompatible with providing the `optimizers` argument."
+                "You should subclass `Trainer` and override the `create_optimizer_and_scheduler` method."
+            )
         self.tb_writer = tb_writer
         if "prediction_loss_only" in kwargs:
             warnings.warn(
@@ -251,7 +258,6 @@ class Trainer:
                 "To use comet_ml logging, run `pip/conda install comet_ml` "
                 "see https://www.comet.ml/docs/python-sdk/huggingface/"
             )
-        set_seed(self.args.seed)
         # Create output directory if needed
         if self.is_world_process_zero():
             os.makedirs(self.args.output_dir, exist_ok=True)
@@ -542,12 +548,18 @@ class Trainer:
             trial (:obj:`optuna.Trial` or :obj:`Dict[str, Any]`, `optional`):
                 The trial run or the hyperparameter dictionary for hyperparameter search.
         """
+        # This might change the seed so needs to run first.
+        self._hp_search_setup(trial)
+
         # Model re-init
         if self.model_init is not None:
+            # Seed must be set before instantiating the model when using model_init.
+            set_seed(self.args.seed)
             model = self.model_init()
             self.model = model.to(self.args.device)
 
-        self._hp_search_setup(trial)
+            # Reinitializes optimizer and scheduler
+            self.optimizer, self.lr_scheduler = None, None
 
         # Data loader and number of training steps
         train_dataloader = self.get_train_dataloader()
@@ -788,6 +800,13 @@ class Trainer:
         :obj:`compute_objectie`, which defaults to a function returning the evaluation loss when no metric is provided,
         the sum of all metrics otherwise.
 
+        .. warning::
+
+            To use this method, you need to have provided a ``model_init`` when initializing your
+            :class:`~transformers.Trainer`: we need to reinitialize the model at each new run. This is incompatible
+            with the ``optimizers`` argument, so you need to subclass :class:`~transformers.Trainer` and override the
+            method :meth:`~transformers.Trainer.create_optimizer_and_scheduler` for custom optimizer/scheduler.
+
         Args:
             hp_space (:obj:`Callable[["optuna.Trial"], Dict[str, float]]`, `optional`):
                 A function that defines the hyperparameter search space. Will default to
@@ -825,20 +844,22 @@ class Trainer:
                 )
         backend = HPSearchBackend(backend)
         if backend == HPSearchBackend.OPTUNA and not is_optuna_available():
-            raise RuntimeError(" You picked the optuna backend, but it is not installed. Use `pip install optuna`.")
+            raise RuntimeError("You picked the optuna backend, but it is not installed. Use `pip install optuna`.")
         if backend == HPSearchBackend.RAY and not is_ray_available():
             raise RuntimeError(
-                " You picked the Ray Tune backend, but it is not installed. Use `pip install 'ray[tune]'`."
+                "You picked the Ray Tune backend, but it is not installed. Use `pip install 'ray[tune]'`."
             )
         self.hp_search_backend = backend
 
+        if self.model_init is None:
+            raise RuntimeError(
+                "To use hyperparameter search, you need to pass your model through a model_init function."
+            )
+
         self.hp_space = default_hp_space[backend] if hp_space is None else hp_space
         self.compute_objective = default_compute_objective if compute_objective is None else compute_objective
 
         def _objective(trial):
-            # To make sure optimizer and lr_scheduler are reset with the new choices of HPs
-            self.optimizer = None
-            self.lr_scheduler = None
             self.objective = None
             self.train(trial=trial)
             # If there hasn't been any evaluation during the training loop.
diff --git a/tests/test_trainer.py b/tests/test_trainer.py
index 072ae78302..4f0a56ec78 100755
--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -1,5 +1,6 @@
 import unittest
 
+import nlp
 import numpy as np
 
 from transformers import AutoTokenizer, TrainingArguments, is_torch_available
@@ -93,6 +94,17 @@ if is_torch_available():
 
 @require_torch
 class TrainerIntegrationTest(unittest.TestCase):
+    def check_trained_model(self, model, alternate_seed=False):
+        # Checks a training seeded with learning_rate = 0.1
+        if alternate_seed:
+            # With args.seed = 314
+            self.assertTrue(torch.abs(model.a - 1.0171) < 1e-4)
+            self.assertTrue(torch.abs(model.b - 1.2494) < 1e-4)
+        else:
+            # With default args.seed
+            self.assertTrue(torch.abs(model.a - 0.6975) < 1e-4)
+            self.assertTrue(torch.abs(model.b - 1.2415) < 1e-4)
+
     def setUp(self):
         # Get the default values (in case they change):
         args = TrainingArguments(".")
@@ -103,14 +115,12 @@ class TrainerIntegrationTest(unittest.TestCase):
         # Checks that training worked, model trained and seed made a reproducible training.
         trainer = get_regression_trainer(learning_rate=0.1)
         trainer.train()
-        self.assertTrue(torch.abs(trainer.model.a - 0.6975) < 1e-4)
-        self.assertTrue(torch.abs(trainer.model.b - 1.2415) < 1e-4)
+        self.check_trained_model(trainer.model)
 
         # Checks that a different seed gets different (reproducible) results.
         trainer = get_regression_trainer(learning_rate=0.1, seed=314)
         trainer.train()
-        self.assertTrue(torch.abs(trainer.model.a - 1.0171) < 1e-4)
-        self.assertTrue(torch.abs(trainer.model.b - 1.2494) < 1e-4)
+        self.check_trained_model(trainer.model, alternate_seed=True)
 
     def test_number_of_steps_in_training(self):
         # Regular training has n_epochs * len(train_dl) steps
@@ -190,6 +200,63 @@ class TrainerIntegrationTest(unittest.TestCase):
         x = trainer.eval_dataset.x
         self.assertTrue(np.allclose(preds, 1.5 * x + 2.5))
 
+    def test_trainer_with_nlp(self):
+        np.random.seed(42)
+        x = np.random.normal(size=(64,)).astype(np.float32)
+        y = 2.0 * x + 3.0 + np.random.normal(scale=0.1, size=(64,))
+        train_dataset = nlp.Dataset.from_dict({"input_x": x, "label": y})
+
+        # Base training. Should have the same results as test_reproducible_training
+        model = RegressionModel()
+        args = TrainingArguments("./regression", learning_rate=0.1)
+        trainer = Trainer(model, args, train_dataset=train_dataset)
+        trainer.train()
+        self.check_trained_model(trainer.model)
+
+        # Can return tensors.
+        train_dataset.set_format(type="torch")
+        model = RegressionModel()
+        trainer = Trainer(model, args, train_dataset=train_dataset)
+        trainer.train()
+        self.check_trained_model(trainer.model)
+
+        # Adding one column not used by the model should have no impact
+        z = np.random.normal(size=(64,)).astype(np.float32)
+        train_dataset = nlp.Dataset.from_dict({"input_x": x, "label": y, "extra": z})
+        model = RegressionModel()
+        trainer = Trainer(model, args, train_dataset=train_dataset)
+        trainer.train()
+        self.check_trained_model(trainer.model)
+
+    def test_custom_optimizer(self):
+        train_dataset = RegressionDataset()
+        args = TrainingArguments("./regression")
+        model = RegressionModel()
+        optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
+        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1.0)
+        trainer = Trainer(model, args, train_dataset=train_dataset, optimizers=(optimizer, lr_scheduler))
+        trainer.train()
+
+        self.assertTrue(torch.abs(trainer.model.a - 1.8950) < 1e-4)
+        self.assertTrue(torch.abs(trainer.model.b - 2.5656) < 1e-4)
+        self.assertEqual(trainer.optimizer.state_dict()["param_groups"][0]["lr"], 1.0)
+
+    def test_model_init(self):
+        train_dataset = RegressionDataset()
+        args = TrainingArguments("./regression", learning_rate=0.1)
+        trainer = Trainer(args=args, train_dataset=train_dataset, model_init=lambda: RegressionModel())
+        trainer.train()
+        self.check_trained_model(trainer.model)
+
+        # Re-training should restart from scratch, thus lead the same results.
+        trainer.train()
+        self.check_trained_model(trainer.model)
+
+        # Re-training should restart from scratch, thus lead the same results and new seed should be used.
+        trainer.args.seed = 314
+        trainer.train()
+        self.check_trained_model(trainer.model, alternate_seed=True)
+
     def test_trainer_eval_mrpc(self):
         MODEL_ID = "bert-base-cased-finetuned-mrpc"
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)