From f882966004742ab6af9f03539a749d23792eeaae Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Mon, 8 Mar 2021 07:15:55 -0800 Subject: [PATCH] fix double wrapping + test (#10583) --- src/transformers/trainer.py | 4 ++++ tests/test_trainer.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index f7fe0a2391..0fa496dcc7 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -738,6 +738,10 @@ class Trainer: if self.deepspeed: return self.deepspeed + # train/eval could be run multiple-times - if already wrapped, don't re-wrap it again + if unwrap_model(model) is not model: + return model + # Mixed precision training with apex (torch < 1.6) if self.use_apex and training: model, self.optimizer = amp.initialize(model, self.optimizer, opt_level=self.args.fp16_opt_level) diff --git a/tests/test_trainer.py b/tests/test_trainer.py index 105cedd4de..09801dd6aa 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -574,6 +574,19 @@ class TrainerIntegrationTest(unittest.TestCase): trainer.train() self.check_trained_model(trainer.model) + @require_torch_multi_gpu + def test_run_seq2seq_double_train_wrap_once(self): + # test that we don't wrap the model more than once + # since wrapping primarily happens on multi-gpu setup we want multiple gpus to test for + # example DataParallel(DataParallel(model)) + + trainer = get_regression_trainer() + trainer.train() + model_wrapped_before = trainer.model_wrapped + trainer.train() + model_wrapped_after = trainer.model_wrapped + self.assertIs(model_wrapped_before, model_wrapped_after, "should be not wrapped twice") + def test_can_resume_training(self): if torch.cuda.device_count() > 2: # This test will fail for more than 2 GPUs since the batch size will get bigger and with the number of