From f882966004742ab6af9f03539a749d23792eeaae Mon Sep 17 00:00:00 2001
From: Stas Bekman <stas00@users.noreply.github.com>
Date: Mon, 8 Mar 2021 07:15:55 -0800
Subject: [PATCH] fix double wrapping + test (#10583)

---
 src/transformers/trainer.py |  4 ++++
 tests/test_trainer.py       | 13 +++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index f7fe0a2391..0fa496dcc7 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -738,6 +738,10 @@ class Trainer:
         if self.deepspeed:
             return self.deepspeed
 
+        # train/eval could be run multiple-times - if already wrapped, don't re-wrap it again
+        if unwrap_model(model) is not model:
+            return model
+
         # Mixed precision training with apex (torch < 1.6)
         if self.use_apex and training:
             model, self.optimizer = amp.initialize(model, self.optimizer, opt_level=self.args.fp16_opt_level)
diff --git a/tests/test_trainer.py b/tests/test_trainer.py
index 105cedd4de..09801dd6aa 100644
--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -574,6 +574,19 @@ class TrainerIntegrationTest(unittest.TestCase):
         trainer.train()
         self.check_trained_model(trainer.model)
 
+    @require_torch_multi_gpu
+    def test_run_seq2seq_double_train_wrap_once(self):
+        # test that we don't wrap the model more than once
+        # since wrapping primarily happens on multi-gpu setup we want multiple gpus to test for
+        # example DataParallel(DataParallel(model))
+
+        trainer = get_regression_trainer()
+        trainer.train()
+        model_wrapped_before = trainer.model_wrapped
+        trainer.train()
+        model_wrapped_after = trainer.model_wrapped
+        self.assertIs(model_wrapped_before, model_wrapped_after, "should be not wrapped twice")
+
     def test_can_resume_training(self):
         if torch.cuda.device_count() > 2:
             # This test will fail for more than 2 GPUs since the batch size will get bigger and with the number of