From 57420b103e2a99aea0f5f80e98216029f7349af2 Mon Sep 17 00:00:00 2001
From: Alex Hedges <aphedges@users.noreply.github.com>
Date: Thu, 7 Oct 2021 09:22:11 -0400
Subject: [PATCH] Add missing whitespace to multiline strings (#13916)

---
 src/transformers/benchmark/benchmark_args.py         |  2 +-
 src/transformers/benchmark/benchmark_tf.py           |  8 ++++----
 src/transformers/benchmark/benchmark_utils.py        |  2 +-
 src/transformers/configuration_utils.py              |  2 +-
 .../convert_pytorch_checkpoint_to_tf2.py             |  2 +-
 src/transformers/data/data_collator.py               |  2 +-
 .../feature_extraction_sequence_utils.py             |  2 +-
 src/transformers/generation_beam_search.py           |  4 ++--
 src/transformers/generation_logits_process.py        |  2 +-
 src/transformers/generation_tf_utils.py              |  2 +-
 src/transformers/generation_utils.py                 |  2 +-
 src/transformers/hf_argparser.py                     |  4 ++--
 src/transformers/modeling_flax_pytorch_utils.py      |  2 +-
 src/transformers/modeling_tf_utils.py                |  2 +-
 src/transformers/modeling_utils.py                   |  8 ++++----
 src/transformers/models/bart/configuration_bart.py   |  2 +-
 .../models/beit/feature_extraction_beit.py           |  2 +-
 .../bert_japanese/tokenization_bert_japanese.py      |  2 +-
 .../models/big_bird/modeling_big_bird.py             |  2 +-
 .../bigbird_pegasus/modeling_bigbird_pegasus.py      |  2 +-
 src/transformers/models/canine/modeling_canine.py    |  2 +-
 .../models/clip/feature_extraction_clip.py           |  2 +-
 src/transformers/models/cpm/tokenization_cpm.py      |  2 +-
 src/transformers/models/cpm/tokenization_cpm_fast.py |  2 +-
 .../models/deit/feature_extraction_deit.py           |  2 +-
 .../models/detr/feature_extraction_detr.py           |  2 +-
 src/transformers/models/electra/modeling_electra.py  |  2 +-
 .../models/electra/modeling_tf_electra.py            |  2 +-
 .../encoder_decoder/modeling_encoder_decoder.py      |  2 +-
 .../models/gpt_neo/configuration_gpt_neo.py          | 10 +++++-----
 .../models/hubert/configuration_hubert.py            |  6 +++---
 .../layoutlmv2/feature_extraction_layoutlmv2.py      |  2 +-
 .../models/layoutlmv2/tokenization_layoutlmv2.py     | 12 ++++++------
 .../layoutlmv2/tokenization_layoutlmv2_fast.py       |  2 +-
 src/transformers/models/luke/tokenization_luke.py    |  8 ++++----
 src/transformers/models/rag/retrieval_rag.py         |  2 +-
 .../models/roformer/tokenization_roformer.py         |  2 +-
 .../models/roformer/tokenization_utils.py            |  2 +-
 .../modeling_speech_encoder_decoder.py               |  8 ++++----
 .../speech_to_text/configuration_speech_to_text.py   |  6 +++---
 .../feature_extraction_speech_to_text.py             |  4 ++--
 .../models/squeezebert/modeling_squeezebert.py       |  2 +-
 src/transformers/models/tapas/modeling_tapas.py      |  4 ++--
 src/transformers/models/tapas/tokenization_tapas.py  |  6 +++---
 .../models/transfo_xl/tokenization_transfo_xl.py     |  2 +-
 .../models/visual_bert/modeling_visual_bert.py       |  6 +++---
 .../models/vit/feature_extraction_vit.py             |  2 +-
 .../models/wav2vec2/configuration_wav2vec2.py        |  6 +++---
 .../models/wav2vec2/feature_extraction_wav2vec2.py   |  4 ++--
 .../models/wav2vec2/modeling_wav2vec2.py             |  2 +-
 .../xlm_prophetnet/tokenization_xlm_prophetnet.py    |  4 ++--
 src/transformers/onnx/features.py                    |  2 +-
 src/transformers/tokenization_utils.py               |  6 +++---
 src/transformers/tokenization_utils_base.py          |  4 ++--
 src/transformers/trainer.py                          |  6 +++---
 src/transformers/training_args.py                    |  8 ++++----
 56 files changed, 101 insertions(+), 101 deletions(-)

diff --git a/src/transformers/benchmark/benchmark_args.py b/src/transformers/benchmark/benchmark_args.py
index 28f92eab1a..40bf47da83 100644
--- a/src/transformers/benchmark/benchmark_args.py
+++ b/src/transformers/benchmark/benchmark_args.py
@@ -69,7 +69,7 @@ class PyTorchBenchmarkArguments(BenchmarkArguments):
         default="O1",
         metadata={
             "help": (
-                "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
+                "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
                 "See details at https://nvidia.github.io/apex/amp.html"
             )
         },
diff --git a/src/transformers/benchmark/benchmark_tf.py b/src/transformers/benchmark/benchmark_tf.py
index 7495d449ed..b5b2c2109e 100644
--- a/src/transformers/benchmark/benchmark_tf.py
+++ b/src/transformers/benchmark/benchmark_tf.py
@@ -231,10 +231,10 @@ class TensorFlowBenchmark(Benchmark):
 
     def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]:
         logger.info(
-            "Note that TensorFlow allocates more memory than"
-            "it might need to speed up computation."
-            "The memory reported here corresponds to the memory"
-            "reported by `nvidia-smi`, which can vary depending"
+            "Note that TensorFlow allocates more memory than "
+            "it might need to speed up computation. "
+            "The memory reported here corresponds to the memory "
+            "reported by `nvidia-smi`, which can vary depending "
             "on total available memory on the GPU that is used."
         )
         with self.args.strategy.scope():
diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py
index 87d8ec986e..c8a25ccf21 100644
--- a/src/transformers/benchmark/benchmark_utils.py
+++ b/src/transformers/benchmark/benchmark_utils.py
@@ -801,7 +801,7 @@ class Benchmark(ABC):
                 info["cpu_ram_mb"] = bytes_to_mega_bytes(psutil.virtual_memory().total)
             else:
                 logger.warning(
-                    "Psutil not installed, we won't log available CPU memory."
+                    "Psutil not installed, we won't log available CPU memory. "
                     "Install psutil (pip install psutil) to log available CPU memory."
                 )
                 info["cpu_ram_mb"] = "N/A"
diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
index 42bce23d1b..65dcc5f1f8 100755
--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -314,7 +314,7 @@ class PretrainedConfig(PushToHubMixin):
         allowed_problem_types = ("regression", "single_label_classification", "multi_label_classification")
         if self.problem_type is not None and self.problem_type not in allowed_problem_types:
             raise ValueError(
-                f"The config parameter `problem_type` was not understood: received {self.problem_type}"
+                f"The config parameter `problem_type` was not understood: received {self.problem_type} "
                 "but only 'regression', 'single_label_classification' and 'multi_label_classification' are valid."
             )
 
diff --git a/src/transformers/convert_pytorch_checkpoint_to_tf2.py b/src/transformers/convert_pytorch_checkpoint_to_tf2.py
index da92e0fdc4..315afeccd9 100755
--- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py
+++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py
@@ -444,7 +444,7 @@ if __name__ == "__main__":
         type=str,
         help="The config json file corresponding to the pre-trained model. \n"
         "This specifies the model architecture. If not given and "
-        "--pytorch_checkpoint_path is not given or is a shortcut name"
+        "--pytorch_checkpoint_path is not given or is a shortcut name "
         "use the configuration associated to the shortcut name on the AWS",
     )
     parser.add_argument(
diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py
index f5f155be5c..72d4c151bd 100644
--- a/src/transformers/data/data_collator.py
+++ b/src/transformers/data/data_collator.py
@@ -905,7 +905,7 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):
         """
         if not isinstance(self.tokenizer, (BertTokenizer, BertTokenizerFast)):
             warnings.warn(
-                "DataCollatorForWholeWordMask is only suitable for BertTokenizer-like tokenizers."
+                "DataCollatorForWholeWordMask is only suitable for BertTokenizer-like tokenizers. "
                 "Please refer to the documentation for more information."
             )
 
diff --git a/src/transformers/feature_extraction_sequence_utils.py b/src/transformers/feature_extraction_sequence_utils.py
index 644673f24b..2ea6aa2260 100644
--- a/src/transformers/feature_extraction_sequence_utils.py
+++ b/src/transformers/feature_extraction_sequence_utils.py
@@ -137,7 +137,7 @@ class SequenceFeatureExtractor(FeatureExtractionMixin):
         # The model's main input name, usually `input_values`, has be passed for padding
         if self.model_input_names[0] not in processed_features:
             raise ValueError(
-                "You should supply an instance of :class:`~transformers.BatchFeature` or list of :class:`~transformers.BatchFeature` to this method"
+                "You should supply an instance of :class:`~transformers.BatchFeature` or list of :class:`~transformers.BatchFeature` to this method "
                 f"that includes {self.model_input_names[0]}, but you provided {list(processed_features.keys())}"
             )
 
diff --git a/src/transformers/generation_beam_search.py b/src/transformers/generation_beam_search.py
index cebe754af2..83da0fc677 100644
--- a/src/transformers/generation_beam_search.py
+++ b/src/transformers/generation_beam_search.py
@@ -194,9 +194,9 @@ class BeamSearchScorer(BeamScorer):
 
         if "max_length" in kwargs:
             warnings.warn(
-                "Passing `max_length` to BeamSearchScorer is deprecated and has no effect."
+                "Passing `max_length` to BeamSearchScorer is deprecated and has no effect. "
                 "`max_length` should be passed directly to `beam_search(...)`, `beam_sample(...)`"
-                ",or `group_beam_search(...)`."
+                ", or `group_beam_search(...)`."
             )
 
     @property
diff --git a/src/transformers/generation_logits_process.py b/src/transformers/generation_logits_process.py
index 1a1e7db5e0..bf26ded6c9 100644
--- a/src/transformers/generation_logits_process.py
+++ b/src/transformers/generation_logits_process.py
@@ -438,7 +438,7 @@ class NoBadWordsLogitsProcessor(LogitsProcessor):
                     banned_mask_list.append([idx, token])
                 else:
                     logger.error(
-                        f"An invalid bad word ID is defined: {token}. This ID is not contained in the"
+                        f"An invalid bad word ID is defined: {token}. This ID is not contained in the "
                         f"vocabulary, and is therefore ignored."
                     )
         if not banned_mask_list and self.static_bad_words_mask is None:
diff --git a/src/transformers/generation_tf_utils.py b/src/transformers/generation_tf_utils.py
index f9ca928ae9..0652516674 100644
--- a/src/transformers/generation_tf_utils.py
+++ b/src/transformers/generation_tf_utils.py
@@ -533,7 +533,7 @@ class TFGenerationMixin:
         # We cannot generate if the model does not have a LM head
         if self.get_output_embeddings() is None:
             raise AttributeError(
-                "You tried to generate sequences with a model that does not have a LM Head."
+                "You tried to generate sequences with a model that does not have a LM Head. "
                 "Please use another model class (e.g. `TFOpenAIGPTLMHeadModel`, `TFXLNetLMHeadModel`, `TFGPT2LMHeadModel`, `TFCTRLLMHeadModel`, `TFT5ForConditionalGeneration`, `TFTransfoXLLMHeadModel`)"
             )
 
diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
index 75cdb488bd..b13efbc947 100644
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -935,7 +935,7 @@ class GenerationMixin:
         if input_ids.shape[-1] >= max_length:
             input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
             logger.warning(
-                f"Input length of {input_ids_string} is {input_ids.shape[-1]}, but ``max_length`` is set to {max_length}."
+                f"Input length of {input_ids_string} is {input_ids.shape[-1]}, but ``max_length`` is set to {max_length}. "
                 "This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``."
             )
 
diff --git a/src/transformers/hf_argparser.py b/src/transformers/hf_argparser.py
index 4cb3d1e8b1..2c67821fd5 100644
--- a/src/transformers/hf_argparser.py
+++ b/src/transformers/hf_argparser.py
@@ -84,8 +84,8 @@ class HfArgumentParser(ArgumentParser):
             # it is provided as a third-party extension mechanism.
             if isinstance(field.type, str):
                 raise ImportError(
-                    "This implementation is not compatible with Postponed Evaluation of Annotations (PEP 563),"
-                    "which can be opted in from Python 3.7 with `from __future__ import annotations`."
+                    "This implementation is not compatible with Postponed Evaluation of Annotations (PEP 563), "
+                    "which can be opted in from Python 3.7 with `from __future__ import annotations`. "
                     "We will add compatibility when Python 3.9 is released."
                 )
             typestring = str(field.type)
diff --git a/src/transformers/modeling_flax_pytorch_utils.py b/src/transformers/modeling_flax_pytorch_utils.py
index 7b1588e95b..e8f93e7526 100644
--- a/src/transformers/modeling_flax_pytorch_utils.py
+++ b/src/transformers/modeling_flax_pytorch_utils.py
@@ -230,7 +230,7 @@ def load_flax_weights_in_pytorch_model(pt_model, flax_state):
         if flax_key in pt_model_dict:
             if flax_tensor.shape != pt_model_dict[flax_key].shape:
                 raise ValueError(
-                    f"Flax checkpoint seems to be incorrect. Weight {flax_key_tuple} was expected"
+                    f"Flax checkpoint seems to be incorrect. Weight {flax_key_tuple} was expected "
                     f"to be of shape {pt_model_dict[flax_key].shape}, but is {flax_tensor.shape}."
                 )
             else:
diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
index 18d864e39d..9626a01e98 100644
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -304,7 +304,7 @@ def booleans_processing(config, **kwargs):
             or ("use_cache" in kwargs and kwargs["use_cache"] not in (None, config.use_cache))
         ):
             tf_logger.warning(
-                "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model."
+                "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model. "
                 "They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`)."
             )
 
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index e8d2d0a454..2579102125 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -777,7 +777,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
 
         if not isinstance(old_embeddings, nn.Embedding):
             raise TypeError(
-                f"Old embeddings are of type {type(old_embeddings)}, which is not an instance of {nn.Embedding}."
+                f"Old embeddings are of type {type(old_embeddings)}, which is not an instance of {nn.Embedding}. "
                 f"You should either use a different resize function or make sure that `old_embeddings` are an instance of {nn.Embedding}."
             )
 
@@ -848,7 +848,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
 
         if not isinstance(old_lm_head, nn.Linear):
             raise TypeError(
-                f"Old language model head is of type {type(old_lm_head)}, which is not an instance of {nn.Linear}."
+                f"Old language model head is of type {type(old_lm_head)}, which is not an instance of {nn.Linear}. "
                 f"You should either use a different resize function or make sure that `old_lm_head` are an instance of {nn.Linear}."
             )
 
@@ -1344,8 +1344,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
                     except (UnicodeDecodeError, ValueError):
                         raise OSError(
                             f"Unable to load weights from pytorch checkpoint file for '{pretrained_model_name_or_path}' "
-                            f"at '{resolved_archive_file}'"
-                            "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True. "
+                            f"at '{resolved_archive_file}'. "
+                            "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True."
                         )
 
             # set dtype to instantiate the model under:
diff --git a/src/transformers/models/bart/configuration_bart.py b/src/transformers/models/bart/configuration_bart.py
index 6efbe4ca51..86ca38a61d 100644
--- a/src/transformers/models/bart/configuration_bart.py
+++ b/src/transformers/models/bart/configuration_bart.py
@@ -175,7 +175,7 @@ class BartConfig(PretrainedConfig):
         if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False):
             self.forced_bos_token_id = self.bos_token_id
             warnings.warn(
-                f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions."
+                f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
                 "The config can simply be saved and uploaded again to be fixed."
             )
 
diff --git a/src/transformers/models/beit/feature_extraction_beit.py b/src/transformers/models/beit/feature_extraction_beit.py
index 4bca0a14c8..f5f6b87fc0 100644
--- a/src/transformers/models/beit/feature_extraction_beit.py
+++ b/src/transformers/models/beit/feature_extraction_beit.py
@@ -132,7 +132,7 @@ class BeitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
 
         if not valid_images:
             raise ValueError(
-                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example),"
+                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example), "
                 "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)."
             )
 
diff --git a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py
index ecd7df9b03..41c686e41e 100644
--- a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py
+++ b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py
@@ -272,7 +272,7 @@ class MecabTokenizer:
                 dic_dir = unidic.DICDIR
                 if not os.path.isdir(dic_dir):
                     raise RuntimeError(
-                        "The unidic dictionary itself is not found."
+                        "The unidic dictionary itself is not found. "
                         "See https://github.com/polm/unidic-py for installation."
                     )
 
diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py
index 84a428591e..bdd55494f8 100755
--- a/src/transformers/models/big_bird/modeling_big_bird.py
+++ b/src/transformers/models/big_bird/modeling_big_bird.py
@@ -2066,7 +2066,7 @@ class BigBirdModel(BigBirdPreTrainedModel):
                 "+ additional buffer: config.num_random_blocks * config.block_size "
                 f"= {max_tokens_to_attend} with config.block_size "
                 f"= {self.config.block_size}, config.num_random_blocks "
-                f"= {self.config.num_random_blocks}."
+                f"= {self.config.num_random_blocks}. "
                 "Changing attention type to 'original_full'..."
             )
             self.set_attention_type("original_full")
diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
index 736f612602..db21e9684e 100755
--- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
+++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
@@ -1858,7 +1858,7 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
                 "+ additional buffer: config.num_random_blocks * config.block_size "
                 f"= {max_tokens_to_attend} with config.block_size "
                 f"= {self.config.block_size}, config.num_random_blocks "
-                f"= {self.config.num_random_blocks}."
+                f"= {self.config.num_random_blocks}. "
                 "Changing attention type to 'original_full'..."
             )
             self.set_attention_type("original_full")
diff --git a/src/transformers/models/canine/modeling_canine.py b/src/transformers/models/canine/modeling_canine.py
index a13505d3a0..7afad51c1d 100644
--- a/src/transformers/models/canine/modeling_canine.py
+++ b/src/transformers/models/canine/modeling_canine.py
@@ -549,7 +549,7 @@ class CanineAttention(nn.Module):
         self.local = local
         if attend_from_chunk_width < attend_from_chunk_stride:
             raise ValueError(
-                "`attend_from_chunk_width` < `attend_from_chunk_stride`"
+                "`attend_from_chunk_width` < `attend_from_chunk_stride` "
                 "would cause sequence positions to get skipped."
             )
         if attend_to_chunk_width < attend_to_chunk_stride:
diff --git a/src/transformers/models/clip/feature_extraction_clip.py b/src/transformers/models/clip/feature_extraction_clip.py
index 74a70918b7..b6256b4686 100644
--- a/src/transformers/models/clip/feature_extraction_clip.py
+++ b/src/transformers/models/clip/feature_extraction_clip.py
@@ -129,7 +129,7 @@ class CLIPFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
 
         if not valid_images:
             raise ValueError(
-                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example),"
+                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example), "
                 "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)."
             )
 
diff --git a/src/transformers/models/cpm/tokenization_cpm.py b/src/transformers/models/cpm/tokenization_cpm.py
index 6dfe01d362..7410128a92 100644
--- a/src/transformers/models/cpm/tokenization_cpm.py
+++ b/src/transformers/models/cpm/tokenization_cpm.py
@@ -92,7 +92,7 @@ class CpmTokenizer(XLNetTokenizer):
             import jieba
         except ModuleNotFoundError as error:
             raise error.__class__(
-                "You need to install jieba to use CpmTokenizer or CpmTokenizerFast."
+                "You need to install jieba to use CpmTokenizer or CpmTokenizerFast. "
                 "See https://pypi.org/project/jieba/ for installation."
             )
         self.jieba = jieba
diff --git a/src/transformers/models/cpm/tokenization_cpm_fast.py b/src/transformers/models/cpm/tokenization_cpm_fast.py
index 66fb0ef93e..24a856c73c 100644
--- a/src/transformers/models/cpm/tokenization_cpm_fast.py
+++ b/src/transformers/models/cpm/tokenization_cpm_fast.py
@@ -95,7 +95,7 @@ class CpmTokenizerFast(XLNetTokenizerFast):
             import jieba
         except ModuleNotFoundError as error:
             raise error.__class__(
-                "You need to install jieba to use CpmTokenizer or CpmTokenizerFast."
+                "You need to install jieba to use CpmTokenizer or CpmTokenizerFast. "
                 "See https://pypi.org/project/jieba/ for installation."
             )
         self.jieba = jieba
diff --git a/src/transformers/models/deit/feature_extraction_deit.py b/src/transformers/models/deit/feature_extraction_deit.py
index 591630fff7..f9174be06d 100644
--- a/src/transformers/models/deit/feature_extraction_deit.py
+++ b/src/transformers/models/deit/feature_extraction_deit.py
@@ -132,7 +132,7 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
 
         if not valid_images:
             raise ValueError(
-                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example),"
+                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example), "
                 "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)."
             )
 
diff --git a/src/transformers/models/detr/feature_extraction_detr.py b/src/transformers/models/detr/feature_extraction_detr.py
index 238ac6a0d1..09962b5057 100644
--- a/src/transformers/models/detr/feature_extraction_detr.py
+++ b/src/transformers/models/detr/feature_extraction_detr.py
@@ -483,7 +483,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
 
         if not valid_images:
             raise ValueError(
-                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example),"
+                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example), "
                 "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)."
             )
 
diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py
index 1f44b23522..c81e4fb557 100644
--- a/src/transformers/models/electra/modeling_electra.py
+++ b/src/transformers/models/electra/modeling_electra.py
@@ -799,7 +799,7 @@ ELECTRA_INPUTS_DOCSTRING = r"""
 @add_start_docstrings(
     "The bare Electra Model transformer outputting raw hidden-states without any specific head on top. Identical to "
     "the BERT model except that it uses an additional linear layer between the embedding layer and the encoder if the "
-    "hidden size and embedding size are different."
+    "hidden size and embedding size are different. "
     ""
     "Both the generator and discriminator checkpoints may be loaded into this model.",
     ELECTRA_START_DOCSTRING,
diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py
index 878395af00..aad2a787d4 100644
--- a/src/transformers/models/electra/modeling_tf_electra.py
+++ b/src/transformers/models/electra/modeling_tf_electra.py
@@ -719,7 +719,7 @@ ELECTRA_INPUTS_DOCSTRING = r"""
 @add_start_docstrings(
     "The bare Electra Model transformer outputting raw hidden-states without any specific head on top. Identical to "
     "the BERT model except that it uses an additional linear layer between the embedding layer and the encoder if the "
-    "hidden size and embedding size are different."
+    "hidden size and embedding size are different. "
     ""
     "Both the generator and discriminator checkpoints may be loaded into this model.",
     ELECTRA_START_DOCSTRING,
diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py
index c5e6404827..f3473a53a8 100644
--- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py
+++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py
@@ -482,7 +482,7 @@ class EncoderDecoderModel(PreTrainedModel):
 
     def resize_token_embeddings(self, *args, **kwargs):
         raise NotImplementedError(
-            "Resizing the embedding layers via the EncoderDecoderModel directly is not supported."
+            "Resizing the embedding layers via the EncoderDecoderModel directly is not supported. "
             "Please use the respective methods of the wrapped objects (model.encoder.resize_token_embeddings(...) or model.decoder.resize_token_embeddings(...))"
         )
 
diff --git a/src/transformers/models/gpt_neo/configuration_gpt_neo.py b/src/transformers/models/gpt_neo/configuration_gpt_neo.py
index d5069fb017..959d0bc7de 100644
--- a/src/transformers/models/gpt_neo/configuration_gpt_neo.py
+++ b/src/transformers/models/gpt_neo/configuration_gpt_neo.py
@@ -151,11 +151,11 @@ class GPTNeoConfig(PretrainedConfig):
 
         if len(self.attention_layers) != self.num_layers:
             raise ValueError(
-                "Configuration for convolutional module is incorrect."
-                "It is required that `len(config.attention_layers)` == `config.num_layers`"
-                f"but is `len(config.attention_layers) = {len(self.attention_layers)}`,"
-                f"`config.num_layers = {self.num_layers}`."
-                "`config.attention_layers` is prepared using `config.attention_types`."
+                "Configuration for convolutional module is incorrect. "
+                "It is required that `len(config.attention_layers)` == `config.num_layers` "
+                f"but is `len(config.attention_layers) = {len(self.attention_layers)}`, "
+                f"`config.num_layers = {self.num_layers}`. "
+                "`config.attention_layers` is prepared using `config.attention_types`. "
                 "Please verify the value of `config.attention_types` argument."
             )
 
diff --git a/src/transformers/models/hubert/configuration_hubert.py b/src/transformers/models/hubert/configuration_hubert.py
index 682158bca8..30f47b5344 100644
--- a/src/transformers/models/hubert/configuration_hubert.py
+++ b/src/transformers/models/hubert/configuration_hubert.py
@@ -211,9 +211,9 @@ class HubertConfig(PretrainedConfig):
             or (len(self.conv_dim) != self.num_feat_extract_layers)
         ):
             raise ValueError(
-                "Configuration for convolutional layers is incorrect."
-                "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`,"
-                f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride)"
+                "Configuration for convolutional layers is incorrect. "
+                "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, "
+                f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) "
                 f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`."
             )
 
diff --git a/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py b/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py
index 42ba7ba448..0fa06b670b 100644
--- a/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py
@@ -181,7 +181,7 @@ class LayoutLMv2FeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
 
         if not valid_images:
             raise ValueError(
-                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example),"
+                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example), "
                 "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples), "
                 f"but is of type {type(images)}."
             )
diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py
index 28b7f69449..de9a187115 100644
--- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py
@@ -431,7 +431,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
                 raise ValueError("text input must of type `str` (single example) or `List[str]` (batch of examples). ")
             if not isinstance(text_pair, (list, tuple)):
                 raise ValueError(
-                    "words must of type `List[str]` (single pretokenized example),"
+                    "words must of type `List[str]` (single pretokenized example), "
                     "or `List[List[str]]` (batch of pretokenized examples)."
                 )
         else:
@@ -599,7 +599,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
 
         if return_offsets_mapping:
             raise NotImplementedError(
-                "return_offset_mapping is not available when using Python tokenizers."
+                "return_offset_mapping is not available when using Python tokenizers. "
                 "To use this feature, change your tokenizer to one deriving from "
                 "transformers.PreTrainedTokenizerFast."
             )
@@ -838,9 +838,9 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
     ) -> BatchEncoding:
         if return_offsets_mapping:
             raise NotImplementedError(
-                "return_offset_mapping is not available when using Python tokenizers."
+                "return_offset_mapping is not available when using Python tokenizers. "
                 "To use this feature, change your tokenizer to one deriving from "
-                "transformers.PreTrainedTokenizerFast."
+                "transformers.PreTrainedTokenizerFast. "
                 "More information on available tokenizers at "
                 "https://github.com/huggingface/transformers/pull/2674"
             )
@@ -1158,7 +1158,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
                 labels = labels[:-num_tokens_to_remove]
             else:
                 logger.error(
-                    f"We need to remove {num_tokens_to_remove} to truncate the input"
+                    f"We need to remove {num_tokens_to_remove} to truncate the input "
                     f"but the first sequence has a length {len(ids)}. "
                     f"Please select another truncation strategy than {truncation_strategy}, "
                     f"for instance 'longest_first' or 'only_second'."
@@ -1172,7 +1172,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer):
                 pair_token_boxes = pair_token_boxes[:-num_tokens_to_remove]
             else:
                 logger.error(
-                    f"We need to remove {num_tokens_to_remove} to truncate the input"
+                    f"We need to remove {num_tokens_to_remove} to truncate the input "
                     f"but the second sequence has a length {len(pair_ids)}. "
                     f"Please select another truncation strategy than {truncation_strategy}, "
                     f"for instance 'longest_first' or 'only_first'."
diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py
index b7e6f83a7d..fa9569f2ed 100644
--- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py
+++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py
@@ -235,7 +235,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
                 raise ValueError("text input must of type `str` (single example) or `List[str]` (batch of examples). ")
             if not isinstance(text_pair, (list, tuple)):
                 raise ValueError(
-                    "words must of type `List[str]` (single pretokenized example),"
+                    "words must of type `List[str]` (single pretokenized example), "
                     "or `List[List[str]]` (batch of pretokenized examples)."
                 )
         else:
diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py
index fb9d7f7acd..648fb2a6ff 100644
--- a/src/transformers/models/luke/tokenization_luke.py
+++ b/src/transformers/models/luke/tokenization_luke.py
@@ -519,9 +519,9 @@ class LukeTokenizer(RobertaTokenizer):
 
         if return_offsets_mapping:
             raise NotImplementedError(
-                "return_offset_mapping is not available when using Python tokenizers."
+                "return_offset_mapping is not available when using Python tokenizers. "
                 "To use this feature, change your tokenizer to one deriving from "
-                "transformers.PreTrainedTokenizerFast."
+                "transformers.PreTrainedTokenizerFast. "
                 "More information on available tokenizers at "
                 "https://github.com/huggingface/transformers/pull/2674"
             )
@@ -683,7 +683,7 @@ class LukeTokenizer(RobertaTokenizer):
     ) -> BatchEncoding:
         if return_offsets_mapping:
             raise NotImplementedError(
-                "return_offset_mapping is not available when using Python tokenizers."
+                "return_offset_mapping is not available when using Python tokenizers. "
                 "To use this feature, change your tokenizer to one deriving from "
                 "transformers.PreTrainedTokenizerFast."
             )
@@ -1308,7 +1308,7 @@ class LukeTokenizer(RobertaTokenizer):
         # The model's main input name, usually `input_ids`, has be passed for padding
         if self.model_input_names[0] not in encoded_inputs:
             raise ValueError(
-                "You should supply an encoding or a list of encodings to this method"
+                "You should supply an encoding or a list of encodings to this method "
                 f"that includes {self.model_input_names[0]}, but you provided {list(encoded_inputs.keys())}"
             )
 
diff --git a/src/transformers/models/rag/retrieval_rag.py b/src/transformers/models/rag/retrieval_rag.py
index 5481909d16..aaf8fcdf77 100644
--- a/src/transformers/models/rag/retrieval_rag.py
+++ b/src/transformers/models/rag/retrieval_rag.py
@@ -122,7 +122,7 @@ class LegacyIndex(Index):
         except EnvironmentError:
             msg = (
                 f"Can't load '{archive_file}'. Make sure that:\n\n"
-                f"- '{index_path}' is a correct remote path to a directory containing a file named {filename}"
+                f"- '{index_path}' is a correct remote path to a directory containing a file named {filename}\n\n"
                 f"- or '{index_path}' is the correct path to a directory containing a file named {filename}.\n\n"
             )
             raise EnvironmentError(msg)
diff --git a/src/transformers/models/roformer/tokenization_roformer.py b/src/transformers/models/roformer/tokenization_roformer.py
index 5eed180067..c2fb637c4c 100644
--- a/src/transformers/models/roformer/tokenization_roformer.py
+++ b/src/transformers/models/roformer/tokenization_roformer.py
@@ -161,7 +161,7 @@ class RoFormerTokenizer(PreTrainedTokenizer):
             import rjieba
         except ImportError:
             raise ImportError(
-                "You need to install rjieba to use RoFormerTokenizer."
+                "You need to install rjieba to use RoFormerTokenizer. "
                 "See https://pypi.org/project/rjieba/ for installation."
             )
         self.jieba = rjieba
diff --git a/src/transformers/models/roformer/tokenization_utils.py b/src/transformers/models/roformer/tokenization_utils.py
index 195e6eff2d..9f5f1546fb 100644
--- a/src/transformers/models/roformer/tokenization_utils.py
+++ b/src/transformers/models/roformer/tokenization_utils.py
@@ -32,7 +32,7 @@ class JiebaPreTokenizer:
             import rjieba
         except ImportError:
             raise ImportError(
-                "You need to install rjieba to use RoFormerTokenizer."
+                "You need to install rjieba to use RoFormerTokenizer. "
                 "See https://pypi.org/project/rjieba/ for installation."
             )
         self.jieba = rjieba
diff --git a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
index 47e1839fea..a576caff58 100644
--- a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
+++ b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
@@ -352,7 +352,7 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
                 decoder_config = AutoConfig.from_pretrained(decoder_pretrained_model_name_or_path)
                 if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False:
                     logger.info(
-                        f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model."
+                        f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. "
                         "Cross attention layers are added to {decoder_pretrained_model_name_or_path} "
                         "and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for cross attention layers."
                     )
@@ -363,9 +363,9 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
 
             if kwargs_decoder["config"].is_decoder is False or kwargs_decoder["config"].add_cross_attention is False:
                 logger.warning(
-                    f"Decoder model {decoder_pretrained_model_name_or_path} is not initialized as a decoder."
+                    f"Decoder model {decoder_pretrained_model_name_or_path} is not initialized as a decoder. "
                     f"In order to initialize {decoder_pretrained_model_name_or_path} as a decoder, "
-                    "make sure that the attributes `is_decoder` and `add_cross_attention` of `decoder_config`"
+                    "make sure that the attributes `is_decoder` and `add_cross_attention` of `decoder_config` "
                     "passed to `.from_encoder_decoder_pretrained(...)` are set to `True` or do not pass a `decoder_config` to `.from_encoder_decoder_pretrained(...)`"
                 )
 
@@ -513,7 +513,7 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
 
     def resize_token_embeddings(self, *args, **kwargs):
         raise NotImplementedError(
-            "Resizing the embedding layers via the SpeechEncoderDecoderModel directly is not supported."
+            "Resizing the embedding layers via the SpeechEncoderDecoderModel directly is not supported. "
             "Please use the respective methods of the wrapped decoder object (model.decoder.resize_token_embeddings(...))"
         )
 
diff --git a/src/transformers/models/speech_to_text/configuration_speech_to_text.py b/src/transformers/models/speech_to_text/configuration_speech_to_text.py
index 821362d2e6..75fbca0f90 100644
--- a/src/transformers/models/speech_to_text/configuration_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/configuration_speech_to_text.py
@@ -175,9 +175,9 @@ class Speech2TextConfig(PretrainedConfig):
 
         if len(self.conv_kernel_sizes) != self.num_conv_layers:
             raise ValueError(
-                "Configuration for convolutional module is incorrect."
-                "It is required that `len(config.conv_kernel_sizes)` == `config.num_conv_layers`"
-                f"but is `len(config.conv_kernel_sizes) = {len(self.conv_kernel_sizes)}`,"
+                "Configuration for convolutional module is incorrect. "
+                "It is required that `len(config.conv_kernel_sizes)` == `config.num_conv_layers` "
+                f"but is `len(config.conv_kernel_sizes) = {len(self.conv_kernel_sizes)}`, "
                 f"`config.num_conv_layers = {self.num_conv_layers}`."
             )
 
diff --git a/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py b/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py
index d4d7e30e3e..fd82de9399 100644
--- a/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py
@@ -189,12 +189,12 @@ class Speech2TextFeatureExtractor(SequenceFeatureExtractor):
         if sampling_rate is not None:
             if sampling_rate != self.sampling_rate:
                 raise ValueError(
-                    f"The model corresponding to this feature extractor: {self} was trained using a sampling rate of {self.sampling_rate}."
+                    f"The model corresponding to this feature extractor: {self} was trained using a sampling rate of {self.sampling_rate}. "
                     f"Please make sure that the provided `raw_speech` input was sampled with {self.sampling_rate} and not {sampling_rate}."
                 )
         else:
             logger.warning(
-                "It is strongly recommended to pass the `sampling_rate` argument to this function."
+                "It is strongly recommended to pass the `sampling_rate` argument to this function. "
                 "Failing to do so can result in silent errors that might be hard to debug."
             )
 
diff --git a/src/transformers/models/squeezebert/modeling_squeezebert.py b/src/transformers/models/squeezebert/modeling_squeezebert.py
index 32691d9be3..577b07b2fc 100644
--- a/src/transformers/models/squeezebert/modeling_squeezebert.py
+++ b/src/transformers/models/squeezebert/modeling_squeezebert.py
@@ -300,7 +300,7 @@ class SqueezeBertEncoder(nn.Module):
         super().__init__()
 
         assert config.embedding_size == config.hidden_size, (
-            "If you want embedding_size != intermediate hidden_size,"
+            "If you want embedding_size != intermediate hidden_size, "
             "please insert a Conv1d layer to adjust the number of channels "
             "before the first SqueezeBertModule."
         )
diff --git a/src/transformers/models/tapas/modeling_tapas.py b/src/transformers/models/tapas/modeling_tapas.py
index 9506216522..2f1a21fac3 100644
--- a/src/transformers/models/tapas/modeling_tapas.py
+++ b/src/transformers/models/tapas/modeling_tapas.py
@@ -54,8 +54,8 @@ if is_scatter_available():
         from torch_scatter import scatter
     except OSError:
         logger.error(
-            "TAPAS models are not usable since `torch_scatter` can't be loaded."
-            "It seems you have `torch_scatter` installed with the wrong CUDA version."
+            "TAPAS models are not usable since `torch_scatter` can't be loaded. "
+            "It seems you have `torch_scatter` installed with the wrong CUDA version. "
             "Please try to reinstall it following the instructions here: https://github.com/rusty1s/pytorch_scatter."
         )
 
diff --git a/src/transformers/models/tapas/tokenization_tapas.py b/src/transformers/models/tapas/tokenization_tapas.py
index ab433c8be6..11fbe1cf20 100644
--- a/src/transformers/models/tapas/tokenization_tapas.py
+++ b/src/transformers/models/tapas/tokenization_tapas.py
@@ -712,7 +712,7 @@ class TapasTokenizer(PreTrainedTokenizer):
 
         if return_offsets_mapping:
             raise NotImplementedError(
-                "return_offset_mapping is not available when using Python tokenizers."
+                "return_offset_mapping is not available when using Python tokenizers. "
                 "To use this feature, change your tokenizer to one deriving from "
                 "transformers.PreTrainedTokenizerFast."
             )
@@ -981,7 +981,7 @@ class TapasTokenizer(PreTrainedTokenizer):
 
         if return_offsets_mapping:
             raise NotImplementedError(
-                "return_offset_mapping is not available when using Python tokenizers."
+                "return_offset_mapping is not available when using Python tokenizers. "
                 "To use this feature, change your tokenizer to one deriving from "
                 "transformers.PreTrainedTokenizerFast."
             )
@@ -1159,7 +1159,7 @@ class TapasTokenizer(PreTrainedTokenizer):
 
         if max_length is not None and len(input_ids) > max_length:
             raise ValueError(
-                "Could not encode the query and table header given the maximum length. Encoding the query and table"
+                "Could not encode the query and table header given the maximum length. Encoding the query and table "
                 f"header results in a length of {len(input_ids)} which is higher than the max_length of {max_length}"
             )
 
diff --git a/src/transformers/models/transfo_xl/tokenization_transfo_xl.py b/src/transformers/models/transfo_xl/tokenization_transfo_xl.py
index e380197a5f..eb0f2f5a18 100644
--- a/src/transformers/models/transfo_xl/tokenization_transfo_xl.py
+++ b/src/transformers/models/transfo_xl/tokenization_transfo_xl.py
@@ -236,7 +236,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
         except Exception as e:
             raise ValueError(
                 f"Unable to parse file {pretrained_vocab_file}. Unknown format. "
-                "If you tried to load a model saved through TransfoXLTokenizerFast,"
+                "If you tried to load a model saved through TransfoXLTokenizerFast, "
                 "please note they are not compatible."
             ) from e
 
diff --git a/src/transformers/models/visual_bert/modeling_visual_bert.py b/src/transformers/models/visual_bert/modeling_visual_bert.py
index 85b3c75781..6d8d51b4ab 100755
--- a/src/transformers/models/visual_bert/modeling_visual_bert.py
+++ b/src/transformers/models/visual_bert/modeling_visual_bert.py
@@ -174,7 +174,7 @@ class VisualBertEmbeddings(nn.Module):
                 if visual_position_embeddings.size(1) != visual_embeds.size(1):
                     if visual_position_embeddings.size(1) < visual_embeds.size(1):
                         raise ValueError(
-                            f"Visual position embeddings length: {visual_position_embeddings.size(1)}"
+                            f"Visual position embeddings length: {visual_position_embeddings.size(1)} "
                             f"should be the same as `visual_embeds` length: {visual_embeds.size(1)}"
                         )
                     visual_position_embeddings = visual_position_embeddings[:, : visual_embeds.size(1), :]
@@ -973,7 +973,7 @@ class VisualBertForPreTraining(VisualBertPreTrainedModel):
             total_size = attention_mask.size(-1) + visual_attention_mask.size(-1)
             if labels.size(-1) != total_size:
                 raise ValueError(
-                    f"The labels provided should have same sequence length as total attention mask."
+                    f"The labels provided should have same sequence length as total attention mask. "
                     f"Found labels with sequence length {labels.size(-1)}, expected {total_size}."
                 )
 
@@ -986,7 +986,7 @@ class VisualBertForPreTraining(VisualBertPreTrainedModel):
             total_size = attention_mask.size(-1) + visual_attention_mask.size(-1)
             if labels.size(-1) != total_size:
                 raise ValueError(
-                    f"The labels provided should have same sequence length as total attention mask."
+                    f"The labels provided should have same sequence length as total attention mask. "
                     f"Found labels with sequence length {labels.size(-1)}, expected {total_size}."
                 )
 
diff --git a/src/transformers/models/vit/feature_extraction_vit.py b/src/transformers/models/vit/feature_extraction_vit.py
index f700088372..0ac709ea2a 100644
--- a/src/transformers/models/vit/feature_extraction_vit.py
+++ b/src/transformers/models/vit/feature_extraction_vit.py
@@ -122,7 +122,7 @@ class ViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
 
         if not valid_images:
             raise ValueError(
-                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example),"
+                "Images must of type `PIL.Image.Image`, `np.ndarray` or `torch.Tensor` (single example), "
                 "`List[PIL.Image.Image]`, `List[np.ndarray]` or `List[torch.Tensor]` (batch of examples)."
             )
 
diff --git a/src/transformers/models/wav2vec2/configuration_wav2vec2.py b/src/transformers/models/wav2vec2/configuration_wav2vec2.py
index 905bcd94a9..44e156f2b2 100644
--- a/src/transformers/models/wav2vec2/configuration_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/configuration_wav2vec2.py
@@ -237,9 +237,9 @@ class Wav2Vec2Config(PretrainedConfig):
             or (len(self.conv_dim) != self.num_feat_extract_layers)
         ):
             raise ValueError(
-                "Configuration for convolutional layers is incorrect."
-                "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`,"
-                f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride)"
+                "Configuration for convolutional layers is incorrect. "
+                "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, "
+                f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) "
                 f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`."
             )
 
diff --git a/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py b/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py
index 6aa60df560..0f64412bbd 100644
--- a/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py
@@ -170,12 +170,12 @@ class Wav2Vec2FeatureExtractor(SequenceFeatureExtractor):
         if sampling_rate is not None:
             if sampling_rate != self.sampling_rate:
                 raise ValueError(
-                    f"The model corresponding to this feature extractor: {self} was trained using a sampling rate of {self.sampling_rate}."
+                    f"The model corresponding to this feature extractor: {self} was trained using a sampling rate of {self.sampling_rate}. "
                     f"Please make sure that the provided `raw_speech` input was sampled with {self.sampling_rate} and not {sampling_rate}."
                 )
         else:
             logger.warning(
-                "It is strongly recommended to pass the ``sampling_rate`` argument to this function."
+                "It is strongly recommended to pass the ``sampling_rate`` argument to this function. "
                 "Failing to do so can result in silent errors that might be hard to debug."
             )
 
diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py
index efef45930f..7ede44928b 100755
--- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py
@@ -1421,7 +1421,7 @@ class Wav2Vec2ForCTC(Wav2Vec2PreTrainedModel):
             raise ValueError(
                 f"You are trying to instantiate {self.__class__} with a configuration that "
                 "does not define the vocabulary size of the language model head. Please "
-                "instantiate the model as follows: `Wav2Vec2ForCTC.from_pretrained(..., vocab_size=vocab_size)`."
+                "instantiate the model as follows: `Wav2Vec2ForCTC.from_pretrained(..., vocab_size=vocab_size)`. "
                 "or define `vocab_size` of your model's configuration."
             )
         self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
diff --git a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py b/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py
index 2334a6d03b..bb23b1d7aa 100644
--- a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py
+++ b/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py
@@ -152,7 +152,7 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer):
             import sentencepiece as spm
         except ImportError:
             logger.warning(
-                "You need to install SentencePiece to use XLMRobertaTokenizer: https://github.com/google/sentencepiece"
+                "You need to install SentencePiece to use XLMRobertaTokenizer: https://github.com/google/sentencepiece "
                 "pip install sentencepiece"
             )
             raise
@@ -191,7 +191,7 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer):
             import sentencepiece as spm
         except ImportError:
             logger.warning(
-                "You need to install SentencePiece to use XLMRobertaTokenizer: https://github.com/google/sentencepiece"
+                "You need to install SentencePiece to use XLMRobertaTokenizer: https://github.com/google/sentencepiece "
                 "pip install sentencepiece"
             )
             raise
diff --git a/src/transformers/onnx/features.py b/src/transformers/onnx/features.py
index 92caafb3c5..937189b28f 100644
--- a/src/transformers/onnx/features.py
+++ b/src/transformers/onnx/features.py
@@ -109,7 +109,7 @@ class FeaturesManager:
         task = FeaturesManager.feature_to_task(feature)
         if task not in FeaturesManager._TASKS_TO_AUTOMODELS:
             raise KeyError(
-                f"Unknown task: {feature}."
+                f"Unknown task: {feature}. "
                 f"Possible values are {list(FeaturesManager._TASKS_TO_AUTOMODELS.values())}"
             )
 
diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
index 5b2170b697..f10267fd4b 100644
--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -596,9 +596,9 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
 
         if return_offsets_mapping:
             raise NotImplementedError(
-                "return_offset_mapping is not available when using Python tokenizers."
+                "return_offset_mapping is not available when using Python tokenizers. "
                 "To use this feature, change your tokenizer to one deriving from "
-                "transformers.PreTrainedTokenizerFast."
+                "transformers.PreTrainedTokenizerFast. "
                 "More information on available tokenizers at "
                 "https://github.com/huggingface/transformers/pull/2674"
             )
@@ -673,7 +673,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
 
         if return_offsets_mapping:
             raise NotImplementedError(
-                "return_offset_mapping is not available when using Python tokenizers."
+                "return_offset_mapping is not available when using Python tokenizers. "
                 "To use this feature, change your tokenizer to one deriving from "
                 "transformers.PreTrainedTokenizerFast."
             )
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index fde52ccb4b..9519c7cfb7 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -3059,7 +3059,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
                 pair_ids = pair_ids[:-num_tokens_to_remove]
             else:
                 logger.error(
-                    f"We need to remove {num_tokens_to_remove} to truncate the input"
+                    f"We need to remove {num_tokens_to_remove} to truncate the input "
                     f"but the second sequence has a length {len(pair_ids)}. "
                     f"Please select another truncation strategy than {truncation_strategy}, "
                     f"for instance 'longest_first' or 'only_first'."
@@ -3250,7 +3250,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
         """
         assert already_has_special_tokens and token_ids_1 is None, (
             "You cannot use ``already_has_special_tokens=False`` with this tokenizer. "
-            "Please use a slow (full python) tokenizer to activate this argument."
+            "Please use a slow (full python) tokenizer to activate this argument. "
             "Or set `return_special_tokens_mask=True` when calling the encoding method "
             "to get the special tokens mask in any tokenizer. "
         )
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index cc24f73570..e59b5982f8 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -385,7 +385,7 @@ class Trainer:
         self.optimizer, self.lr_scheduler = optimizers
         if model_init is not None and (self.optimizer is not None or self.lr_scheduler is not None):
             raise RuntimeError(
-                "Passing a `model_init` is incompatible with providing the `optimizers` argument."
+                "Passing a `model_init` is incompatible with providing the `optimizers` argument. "
                 "You should subclass `Trainer` and override the `create_optimizer_and_scheduler` method."
             )
         default_callbacks = DEFAULT_CALLBACKS + get_reporting_integration_callbacks(self.args.report_to)
@@ -1737,8 +1737,8 @@ class Trainer:
             if backend is None:
                 raise RuntimeError(
                     "At least one of optuna or ray should be installed. "
-                    "To install optuna run `pip install optuna`."
-                    "To install ray run `pip install ray[tune]`."
+                    "To install optuna run `pip install optuna`. "
+                    "To install ray run `pip install ray[tune]`. "
                     "To install sigopt run `pip install sigopt`."
                 )
         backend = HPSearchBackend(backend)
diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py
index 85abc7f890..90f44a4f36 100644
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -385,7 +385,7 @@ class TrainingArguments:
         default=False,
         metadata={
             "help": (
-                "Overwrite the content of the output directory."
+                "Overwrite the content of the output directory. "
                 "Use this to continue training if output_dir points to a checkpoint directory."
             )
         },
@@ -420,7 +420,7 @@ class TrainingArguments:
     per_gpu_eval_batch_size: Optional[int] = field(
         default=None,
         metadata={
-            "help": "Deprecated, the use of `--per_device_eval_batch_size` is preferred."
+            "help": "Deprecated, the use of `--per_device_eval_batch_size` is preferred. "
             "Batch size per GPU/TPU core/CPU for evaluation."
         },
     )
@@ -492,7 +492,7 @@ class TrainingArguments:
         default=None,
         metadata={
             "help": (
-                "Limit the total amount of checkpoints."
+                "Limit the total amount of checkpoints. "
                 "Deletes the older checkpoints in the output_dir. Default is unlimited checkpoints"
             )
         },
@@ -514,7 +514,7 @@ class TrainingArguments:
         default="O1",
         metadata={
             "help": (
-                "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
+                "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
                 "See details at https://nvidia.github.io/apex/amp.html"
             )
         },