diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py
index bc3da2d586..94821a24f0 100644
--- a/examples/language-modeling/run_clm.py
+++ b/examples/language-modeling/run_clm.py
@@ -362,12 +362,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py
index d9e4302d95..dd4c57c59d 100644
--- a/examples/language-modeling/run_mlm.py
+++ b/examples/language-modeling/run_mlm.py
@@ -403,12 +403,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/examples/language-modeling/run_mlm_wwm.py b/examples/language-modeling/run_mlm_wwm.py
index 94f571ea64..e08c5e979d 100644
--- a/examples/language-modeling/run_mlm_wwm.py
+++ b/examples/language-modeling/run_mlm_wwm.py
@@ -355,12 +355,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/examples/language-modeling/run_plm.py b/examples/language-modeling/run_plm.py
index f27303ca9f..74023817a2 100644
--- a/examples/language-modeling/run_plm.py
+++ b/examples/language-modeling/run_plm.py
@@ -384,12 +384,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/examples/multiple-choice/run_swag.py b/examples/multiple-choice/run_swag.py
index 504f5f3c94..60b25ae6aa 100644
--- a/examples/multiple-choice/run_swag.py
+++ b/examples/multiple-choice/run_swag.py
@@ -342,12 +342,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/examples/question-answering/run_qa.py b/examples/question-answering/run_qa.py
index 55f113de9a..11e49d5ec4 100644
--- a/examples/question-answering/run_qa.py
+++ b/examples/question-answering/run_qa.py
@@ -463,12 +463,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/examples/question-answering/run_qa_beam_search.py b/examples/question-answering/run_qa_beam_search.py
index 1bf8f9aa08..5c61a0fe0b 100644
--- a/examples/question-answering/run_qa_beam_search.py
+++ b/examples/question-answering/run_qa_beam_search.py
@@ -502,12 +502,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/examples/seq2seq/run_seq2seq.py b/examples/seq2seq/run_seq2seq.py
index c32e7c34db..0bbc0de2ec 100644
--- a/examples/seq2seq/run_seq2seq.py
+++ b/examples/seq2seq/run_seq2seq.py
@@ -491,12 +491,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py
index d48d3cc103..19a700bf2a 100644
--- a/examples/text-classification/run_glue.py
+++ b/examples/text-classification/run_glue.py
@@ -399,12 +399,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         metrics = train_result.metrics
 
         trainer.save_model()  # Saves the tokenizer too for easy upload
diff --git a/examples/token-classification/run_ner.py b/examples/token-classification/run_ner.py
index 8c2d4d897d..bdfa3710b3 100644
--- a/examples/token-classification/run_ner.py
+++ b/examples/token-classification/run_ner.py
@@ -380,12 +380,12 @@ def main():
     # Training
     if training_args.do_train:
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
-        train_result = trainer.train(model_path=model_path)
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py
index 3682b8bd04..f66989381d 100644
--- a/src/transformers/integrations.py
+++ b/src/transformers/integrations.py
@@ -125,13 +125,13 @@ def run_hp_search_optuna(trainer, n_trials: int, direction: str, **kwargs) -> Be
     import optuna
 
     def _objective(trial, checkpoint_dir=None):
-        model_path = None
+        checkpoint = None
         if checkpoint_dir:
             for subdir in os.listdir(checkpoint_dir):
                 if subdir.startswith(PREFIX_CHECKPOINT_DIR):
-                    model_path = os.path.join(checkpoint_dir, subdir)
+                    checkpoint = os.path.join(checkpoint_dir, subdir)
         trainer.objective = None
-        trainer.train(model_path=model_path, trial=trial)
+        trainer.train(resume_from_checkpoint=checkpoint, trial=trial)
         # If there hasn't been any evaluation during the training loop.
         if getattr(trainer, "objective", None) is None:
             metrics = trainer.evaluate()
@@ -150,13 +150,13 @@ def run_hp_search_ray(trainer, n_trials: int, direction: str, **kwargs) -> BestR
     import ray
 
     def _objective(trial, local_trainer, checkpoint_dir=None):
-        model_path = None
+        checkpoint = None
         if checkpoint_dir:
             for subdir in os.listdir(checkpoint_dir):
                 if subdir.startswith(PREFIX_CHECKPOINT_DIR):
-                    model_path = os.path.join(checkpoint_dir, subdir)
+                    checkpoint = os.path.join(checkpoint_dir, subdir)
         local_trainer.objective = None
-        local_trainer.train(model_path=model_path, trial=trial)
+        local_trainer.train(resume_from_checkpoint=checkpoint, trial=trial)
         # If there hasn't been any evaluation during the training loop.
         if getattr(local_trainer, "objective", None) is None:
             metrics = local_trainer.evaluate()
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index b76a360191..fca67c7b4a 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -676,17 +676,33 @@ class Trainer:
 
         return model
 
-    def train(self, model_path: Optional[str] = None, trial: Union["optuna.Trial", Dict[str, Any]] = None):
+    def train(
+        self,
+        resume_from_checkpoint: Optional[str] = None,
+        trial: Union["optuna.Trial", Dict[str, Any]] = None,
+        **kwargs
+    ):
         """
         Main training entry point.
 
         Args:
-            model_path (:obj:`str`, `optional`):
-                Local path to the model if the model to train has been instantiated from a local path. If present,
-                training will resume from the optimizer/scheduler states loaded here.
+            resume_from_checkpoint (:obj:`str`, `optional`):
+                Local path to a saved checkpoint as saved by a previous instance of :class:`~transformers.Trainer`. If
+                present, training will resume from the model/optimizer/scheduler states loaded here.
             trial (:obj:`optuna.Trial` or :obj:`Dict[str, Any]`, `optional`):
                 The trial run or the hyperparameter dictionary for hyperparameter search.
+            kwargs:
+                Additional keyword arguments used to hide deprecated arguments
         """
+        if "model_path" in kwargs:
+            resume_from_checkpoint = kwargs.pop("model_path")
+            warnings.warn(
+                "`model_path` is deprecated and will be removed in a future version. Use `resume_from_checkpoint` "
+                "instead.",
+                FutureWarning,
+            )
+        if len(kwargs) > 0:
+            raise TypeError(f"train() received got unexpected keyword arguments: {', '.join(list(kwargs.keys()))}.")
         # This might change the seed so needs to run first.
         self._hp_search_setup(trial)
 
@@ -701,13 +717,13 @@ class Trainer:
             self.optimizer, self.lr_scheduler = None, None
 
         # Load potential model checkpoint
-        if model_path is not None and os.path.isfile(os.path.join(model_path, WEIGHTS_NAME)):
-            logger.info(f"Loading model from {model_path}).")
+        if resume_from_checkpoint is not None and os.path.isfile(os.path.join(resume_from_checkpoint, WEIGHTS_NAME)):
+            logger.info(f"Loading model from {resume_from_checkpoint}).")
             if isinstance(self.model, PreTrainedModel):
-                self.model = self.model.from_pretrained(model_path)
+                self.model = self.model.from_pretrained(resume_from_checkpoint)
                 model_reloaded = True
             else:
-                state_dict = torch.load(os.path.join(model_path, WEIGHTS_NAME))
+                state_dict = torch.load(os.path.join(resume_from_checkpoint, WEIGHTS_NAME))
                 self.model.load_state_dict(state_dict)
 
         # If model was re-initialized, put it on the right device and update self.model_wrapped
@@ -757,7 +773,7 @@ class Trainer:
         self.state.is_hyper_param_search = trial is not None
 
         # Check if saved optimizer or scheduler states exist
-        self._load_optimizer_and_scheduler(model_path)
+        self._load_optimizer_and_scheduler(resume_from_checkpoint)
 
         model = self.model_wrapped
 
@@ -827,8 +843,10 @@ class Trainer:
         steps_trained_in_current_epoch = 0
 
         # Check if continuing training from a checkpoint
-        if model_path and os.path.isfile(os.path.join(model_path, "trainer_state.json")):
-            self.state = TrainerState.load_from_json(os.path.join(model_path, "trainer_state.json"))
+        if resume_from_checkpoint is not None and os.path.isfile(
+            os.path.join(resume_from_checkpoint, "trainer_state.json")
+        ):
+            self.state = TrainerState.load_from_json(os.path.join(resume_from_checkpoint, "trainer_state.json"))
             epochs_trained = self.state.global_step // num_update_steps_per_epoch
             if not self.args.ignore_data_skip:
                 steps_trained_in_current_epoch = self.state.global_step % (num_update_steps_per_epoch)
@@ -1102,20 +1120,20 @@ class Trainer:
         if self.is_world_process_zero():
             self._rotate_checkpoints(use_mtime=True)
 
-    def _load_optimizer_and_scheduler(self, model_path):
+    def _load_optimizer_and_scheduler(self, checkpoint):
         """If optimizer and scheduler states exist, load them."""
-        if model_path is None:
+        if checkpoint is None:
             return
 
-        if os.path.isfile(os.path.join(model_path, "optimizer.pt")) and os.path.isfile(
-            os.path.join(model_path, "scheduler.pt")
+        if os.path.isfile(os.path.join(checkpoint, "optimizer.pt")) and os.path.isfile(
+            os.path.join(checkpoint, "scheduler.pt")
         ):
             # Load in optimizer and scheduler states
             if is_torch_tpu_available():
                 # On TPU we have to take some extra precautions to properly load the states on the right device.
-                optimizer_state = torch.load(os.path.join(model_path, "optimizer.pt"), map_location="cpu")
+                optimizer_state = torch.load(os.path.join(checkpoint, "optimizer.pt"), map_location="cpu")
                 with warnings.catch_warnings(record=True) as caught_warnings:
-                    lr_scheduler_state = torch.load(os.path.join(model_path, "scheduler.pt"), map_location="cpu")
+                    lr_scheduler_state = torch.load(os.path.join(checkpoint, "scheduler.pt"), map_location="cpu")
                 reissue_pt_warnings(caught_warnings)
 
                 xm.send_cpu_data_to_device(optimizer_state, self.args.device)
@@ -1125,15 +1143,15 @@ class Trainer:
                 self.lr_scheduler.load_state_dict(lr_scheduler_state)
             else:
                 self.optimizer.load_state_dict(
-                    torch.load(os.path.join(model_path, "optimizer.pt"), map_location=self.args.device)
+                    torch.load(os.path.join(checkpoint, "optimizer.pt"), map_location=self.args.device)
                 )
                 with warnings.catch_warnings(record=True) as caught_warnings:
-                    self.lr_scheduler.load_state_dict(torch.load(os.path.join(model_path, "scheduler.pt")))
+                    self.lr_scheduler.load_state_dict(torch.load(os.path.join(checkpoint, "scheduler.pt")))
                 reissue_pt_warnings(caught_warnings)
 
         if self.deepspeed:
             # Not sure how to check if there is a saved deepspeed checkpoint, but since it just return None if it fails to find a deepspeed checkpoint this is sort of a check-n-load function
-            self.deepspeed.load_checkpoint(model_path, load_optimizer_states=True, load_lr_scheduler_states=True)
+            self.deepspeed.load_checkpoint(checkpoint, load_optimizer_states=True, load_lr_scheduler_states=True)
 
     def hyperparameter_search(
         self,
diff --git a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
index 3d9d7c0529..8e2c834b25 100644
--- a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
+++ b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
@@ -341,20 +341,20 @@ def main():
     if training_args.do_train:
 {%- if cookiecutter.can_train_from_scratch == "False" %}
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
+            checkpoint = None
 {%- elif cookiecutter.can_train_from_scratch == "True" %}
         if last_checkpoint is not None:
-            model_path = last_checkpoint
+            checkpoint = last_checkpoint
         elif model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path):
-            model_path = model_args.model_name_or_path
+            checkpoint = model_args.model_name_or_path
         else:
-            model_path = None
+            checkpoint = None
 {% endif %}
-        train_result = trainer.train(model_path=model_path)
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
         trainer.save_model()  # Saves the tokenizer too for easy upload
 
         output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
diff --git a/tests/test_trainer.py b/tests/test_trainer.py
index 9321e30e8a..48720ccc9f 100644
--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -581,7 +581,7 @@ class TrainerIntegrationTest(unittest.TestCase):
             # Reinitialize trainer
             trainer = get_regression_trainer(output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1)
 
-            trainer.train(model_path=checkpoint)
+            trainer.train(resume_from_checkpoint=checkpoint)
             (a1, b1) = trainer.model.a.item(), trainer.model.b.item()
             state1 = dataclasses.asdict(trainer.state)
             self.assertEqual(a, a1)
@@ -594,7 +594,7 @@ class TrainerIntegrationTest(unittest.TestCase):
             # Reinitialize trainer and load model
             trainer = get_regression_trainer(output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1)
 
-            trainer.train(model_path=checkpoint)
+            trainer.train(resume_from_checkpoint=checkpoint)
             (a1, b1) = trainer.model.a.item(), trainer.model.b.item()
             state1 = dataclasses.asdict(trainer.state)
             self.assertEqual(a, a1)
@@ -617,7 +617,7 @@ class TrainerIntegrationTest(unittest.TestCase):
                 output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1, pretrained=False
             )
 
-            trainer.train(model_path=checkpoint)
+            trainer.train(resume_from_checkpoint=checkpoint)
             (a1, b1) = trainer.model.a.item(), trainer.model.b.item()
             state1 = dataclasses.asdict(trainer.state)
             self.assertEqual(a, a1)
@@ -632,7 +632,7 @@ class TrainerIntegrationTest(unittest.TestCase):
                 output_dir=tmpdir, train_len=128, save_steps=5, learning_rate=0.1, pretrained=False
             )
 
-            trainer.train(model_path=checkpoint)
+            trainer.train(resume_from_checkpoint=checkpoint)
             (a1, b1) = trainer.model.a.item(), trainer.model.b.item()
             state1 = dataclasses.asdict(trainer.state)
             self.assertEqual(a, a1)
@@ -670,7 +670,7 @@ class TrainerIntegrationTest(unittest.TestCase):
                 learning_rate=0.1,
             )
 
-            trainer.train(model_path=checkpoint)
+            trainer.train(resume_from_checkpoint=checkpoint)
             (a1, b1) = trainer.model.a.item(), trainer.model.b.item()
             state1 = dataclasses.asdict(trainer.state)
             self.assertEqual(a, a1)