From 5a0dac53bfd6e69ae64fb3119d607445e1a308d8 Mon Sep 17 00:00:00 2001
From: Teven <teven.lescao@gmail.com>
Date: Mon, 3 Aug 2020 10:19:03 +0200
Subject: [PATCH] Empty assert hunt (#6056)

* Fixed empty asserts

* black-reformatted stragglers in templates

* More code quality checks

* Update src/transformers/convert_marian_to_pytorch.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/convert_marian_to_pytorch.py

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>

* removed unused line as per @sshleifer

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
---
 src/transformers/commands/train.py            |  1 -
 src/transformers/convert_marian_to_pytorch.py | 27 ++++++++++++-------
 .../data/datasets/language_modeling.py        |  4 +--
 src/transformers/data/metrics/__init__.py     |  8 ++++--
 .../data/metrics/squad_metrics.py             |  8 +++---
 src/transformers/data/processors/utils.py     |  8 ++++--
 src/transformers/data/processors/xnli.py      |  8 ++++--
 src/transformers/modeling_albert.py           |  4 ++-
 src/transformers/modeling_bert.py             |  4 ++-
 src/transformers/modeling_electra.py          |  4 ++-
 src/transformers/modeling_gpt2.py             |  4 ++-
 src/transformers/modeling_mobilebert.py       |  4 ++-
 src/transformers/modeling_openai.py           |  8 ++++--
 src/transformers/modeling_t5.py               |  4 ++-
 src/transformers/modeling_tf_albert.py        |  4 ++-
 src/transformers/modeling_tf_distilbert.py    | 12 +++++----
 src/transformers/modeling_tf_flaubert.py      | 12 ++++++---
 src/transformers/modeling_tf_openai.py        |  4 ++-
 src/transformers/modeling_tf_xlnet.py         |  3 +--
 src/transformers/modeling_transfo_xl.py       |  4 ++-
 src/transformers/modeling_xlnet.py            | 12 ++++++---
 src/transformers/tokenization_transfo_xl.py   |  4 +--
 src/transformers/tokenization_utils_base.py   |  5 ++--
 src/transformers/trainer.py                   | 14 +++++++---
 .../adding_a_new_example_script/utils_xxx.py  | 20 +++++++++-----
 templates/adding_a_new_model/modeling_xxx.py  |  4 ++-
 26 files changed, 131 insertions(+), 63 deletions(-)

diff --git a/src/transformers/commands/train.py b/src/transformers/commands/train.py
index 483da8748b..36d8831806 100644
--- a/src/transformers/commands/train.py
+++ b/src/transformers/commands/train.py
@@ -81,7 +81,6 @@ class TrainCommand(BaseTransformersCLICommand):
         self.framework = "tf" if is_tf_available() else "torch"
 
         os.makedirs(args.output, exist_ok=True)
-        assert os.path.isdir(args.output)
         self.output = args.output
 
         self.column_label = args.column_label
diff --git a/src/transformers/convert_marian_to_pytorch.py b/src/transformers/convert_marian_to_pytorch.py
index bd58534ed3..9498d8c2ef 100644
--- a/src/transformers/convert_marian_to_pytorch.py
+++ b/src/transformers/convert_marian_to_pytorch.py
@@ -166,7 +166,7 @@ def write_model_card(
     extra_markdown = f"### {hf_model_name}\n\n* source languages: {s}\n* target languages: {t}\n*  OPUS readme: [{opus_name}]({readme_url})\n"
     # combine with opus markdown
     opus_readme_path = Path(f"{repo_path}{opus_name}/README.md")
-    assert opus_readme_path.exists(), opus_readme_path
+    assert opus_readme_path.exists(), f"Readme file {opus_readme_path} not found"
     content = opus_readme_path.open().read()
     content = content.split("\n# ")[-1]  # Get the lowest level 1 header in the README -- the most recent model.
     content = "*".join(content.split("*")[1:])
@@ -231,7 +231,9 @@ def fetch_test_set(test_set_url):
     src = lmap(str.strip, lns[::4])
     gold = lmap(str.strip, lns[1::4])
     mar_model = lmap(str.strip, lns[2::4])
-    assert len(gold) == len(mar_model) == len(src)
+    assert (
+        len(gold) == len(mar_model) == len(src)
+    ), f"Gold, marian and source lengths {len(gold)}, {len(mar_model)}, {len(src)} mismatched"
     os.remove(fname)
     return src, mar_model, gold
 
@@ -374,20 +376,21 @@ class OpusState:
         self.state_dict = np.load(npz_path)
         cfg = load_config_from_state_dict(self.state_dict)
         assert cfg["dim-vocabs"][0] == cfg["dim-vocabs"][1]
-        assert "Wpos" not in self.state_dict
+        assert "Wpos" not in self.state_dict, "Wpos key in state dictionary"
         self.state_dict = dict(self.state_dict)
         self.wemb, self.final_bias = add_emb_entries(self.state_dict["Wemb"], self.state_dict[BIAS_KEY], 1)
         self.pad_token_id = self.wemb.shape[0] - 1
         cfg["vocab_size"] = self.pad_token_id + 1
         # self.state_dict['Wemb'].sha
         self.state_keys = list(self.state_dict.keys())
-        if "Wtype" in self.state_dict:
-            raise ValueError("found Wtype key")
+        assert "Wtype" not in self.state_dict, "Wtype key in state dictionary"
         self._check_layer_entries()
         self.source_dir = source_dir
         self.cfg = cfg
         hidden_size, intermediate_shape = self.state_dict["encoder_l1_ffn_W1"].shape
-        assert hidden_size == cfg["dim-emb"] == 512
+        assert (
+            hidden_size == cfg["dim-emb"] == 512
+        ), f"Hidden size {hidden_size} and configured size {cfg['dim_emb']} mismatched or not 512"
 
         # Process decoder.yml
         decoder_yml = cast_marian_config(load_yaml(source_dir / "decoder.yml"))
@@ -448,7 +451,7 @@ class OpusState:
     def load_marian_model(self) -> MarianMTModel:
         state_dict, cfg = self.state_dict, self.hf_config
 
-        assert cfg.static_position_embeddings
+        assert cfg.static_position_embeddings, "config.static_position_embeddings should be True"
         model = MarianMTModel(cfg)
 
         assert "hidden_size" not in cfg.to_dict()
@@ -476,7 +479,9 @@ class OpusState:
             raise NotImplementedError("Need to convert layernorm_embedding")
 
         assert not self.extra_keys, f"Failed to convert {self.extra_keys}"
-        assert model.model.shared.padding_idx == self.pad_token_id
+        assert (
+            model.model.shared.padding_idx == self.pad_token_id
+        ), f"Padding tokens {model.model.shared.padding_idx} and {self.pad_token_id} mismatched"
         return model
 
 
@@ -500,7 +505,9 @@ def convert(source_dir: Path, dest_dir):
     save_tokenizer(tokenizer, dest_dir)
 
     opus_state = OpusState(source_dir)
-    assert opus_state.cfg["vocab_size"] == len(tokenizer.encoder)
+    assert opus_state.cfg["vocab_size"] == len(
+        tokenizer.encoder
+    ), f"Original vocab size {opus_state.cfg['vocab_size']} and new vocab size {len(tokenizer.encoder)} mismatched"
     # save_json(opus_state.cfg, dest_dir / "marian_original_config.json")
     # ^^ Save human readable marian config for debugging
 
@@ -517,7 +524,7 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     source_dir = Path(args.src)
-    assert source_dir.exists()
+    assert source_dir.exists(), f"Source directory {source_dir} not found"
     dest_dir = f"converted-{source_dir.name}" if args.dest is None else args.dest
     convert(source_dir, dest_dir)
 
diff --git a/src/transformers/data/datasets/language_modeling.py b/src/transformers/data/datasets/language_modeling.py
index 94988a859b..5a9aeb2225 100644
--- a/src/transformers/data/datasets/language_modeling.py
+++ b/src/transformers/data/datasets/language_modeling.py
@@ -22,7 +22,7 @@ class TextDataset(Dataset):
     def __init__(
         self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, overwrite_cache=False,
     ):
-        assert os.path.isfile(file_path)
+        assert os.path.isfile(file_path), f"Input file path {file_path} not found"
 
         block_size = block_size - tokenizer.num_special_tokens_to_add(pair=False)
 
@@ -82,7 +82,7 @@ class LineByLineTextDataset(Dataset):
     """
 
     def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int):
-        assert os.path.isfile(file_path)
+        assert os.path.isfile(file_path), f"Input file path {file_path} not found"
         # Here, we do not cache the features, operating under the assumption
         # that we will soon use fast multithreaded tokenizers from the
         # `tokenizers` repo everywhere =)
diff --git a/src/transformers/data/metrics/__init__.py b/src/transformers/data/metrics/__init__.py
index 59ffdc8db1..3bb437602d 100644
--- a/src/transformers/data/metrics/__init__.py
+++ b/src/transformers/data/metrics/__init__.py
@@ -51,7 +51,9 @@ if _has_sklearn:
         }
 
     def glue_compute_metrics(task_name, preds, labels):
-        assert len(preds) == len(labels)
+        assert len(preds) == len(
+            labels
+        ), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
         if task_name == "cola":
             return {"mcc": matthews_corrcoef(labels, preds)}
         elif task_name == "sst-2":
@@ -78,7 +80,9 @@ if _has_sklearn:
             raise KeyError(task_name)
 
     def xnli_compute_metrics(task_name, preds, labels):
-        assert len(preds) == len(labels)
+        assert len(preds) == len(
+            labels
+        ), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
         if task_name == "xnli":
             return {"acc": simple_accuracy(preds, labels)}
         else:
diff --git a/src/transformers/data/metrics/squad_metrics.py b/src/transformers/data/metrics/squad_metrics.py
index c467fee71b..d01c34bf08 100644
--- a/src/transformers/data/metrics/squad_metrics.py
+++ b/src/transformers/data/metrics/squad_metrics.py
@@ -523,7 +523,7 @@ def compute_predictions_logits(
         if not nbest:
             nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
 
-        assert len(nbest) >= 1
+        assert len(nbest) >= 1, "No valid predictions"
 
         total_scores = []
         best_non_null_entry = None
@@ -544,7 +544,7 @@ def compute_predictions_logits(
             output["end_logit"] = entry.end_logit
             nbest_json.append(output)
 
-        assert len(nbest_json) >= 1
+        assert len(nbest_json) >= 1, "No valid predictions"
 
         if not version_2_with_negative:
             all_predictions[example.qas_id] = nbest_json[0]["text"]
@@ -739,8 +739,8 @@ def compute_predictions_log_probs(
             output["end_log_prob"] = entry.end_log_prob
             nbest_json.append(output)
 
-        assert len(nbest_json) >= 1
-        assert best_non_null_entry is not None
+        assert len(nbest_json) >= 1, "No valid predictions"
+        assert best_non_null_entry is not None, "No valid predictions"
 
         score_diff = score_null
         scores_diff_json[example.qas_id] = score_diff
diff --git a/src/transformers/data/processors/utils.py b/src/transformers/data/processors/utils.py
index 4550e5756b..7df0471608 100644
--- a/src/transformers/data/processors/utils.py
+++ b/src/transformers/data/processors/utils.py
@@ -194,8 +194,12 @@ class SingleSentenceClassificationProcessor(DataProcessor):
     def add_examples(
         self, texts_or_text_and_labels, labels=None, ids=None, overwrite_labels=False, overwrite_examples=False
     ):
-        assert labels is None or len(texts_or_text_and_labels) == len(labels)
-        assert ids is None or len(texts_or_text_and_labels) == len(ids)
+        assert labels is None or len(texts_or_text_and_labels) == len(
+            labels
+        ), f"Text and labels have mismatched lengths {len(texts_or_text_and_labels)} and {len(labels)}"
+        assert ids is None or len(texts_or_text_and_labels) == len(
+            ids
+        ), f"Text and ids have mismatched lengths {len(texts_or_text_and_labels)} and {len(ids)}"
         if ids is None:
             ids = [None] * len(texts_or_text_and_labels)
         if labels is None:
diff --git a/src/transformers/data/processors/xnli.py b/src/transformers/data/processors/xnli.py
index 6a744c6280..8564634e0d 100644
--- a/src/transformers/data/processors/xnli.py
+++ b/src/transformers/data/processors/xnli.py
@@ -45,7 +45,9 @@ class XnliProcessor(DataProcessor):
             text_a = line[0]
             text_b = line[1]
             label = "contradiction" if line[2] == "contradictory" else line[2]
-            assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
+            assert isinstance(text_a, str), f"Training input {text_a} is not a string"
+            assert isinstance(text_b, str), f"Training input {text_b} is not a string"
+            assert isinstance(label, str), f"Training label {label} is not a string"
             examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
         return examples
 
@@ -63,7 +65,9 @@ class XnliProcessor(DataProcessor):
             text_a = line[6]
             text_b = line[7]
             label = line[1]
-            assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
+            assert isinstance(text_a, str), f"Training input {text_a} is not a string"
+            assert isinstance(text_b, str), f"Training input {text_b} is not a string"
+            assert isinstance(label, str), f"Training label {label} is not a string"
             examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
         return examples
 
diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py
index ef96228b5b..cbd94ce47f 100644
--- a/src/transformers/modeling_albert.py
+++ b/src/transformers/modeling_albert.py
@@ -179,7 +179,9 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
         elif m_name == "kernel":
             array = np.transpose(array)
         try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
         except AssertionError as e:
             e.args += (pointer.shape, array.shape)
             raise
diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py
index 11dd8f8b36..74c5acafbe 100644
--- a/src/transformers/modeling_bert.py
+++ b/src/transformers/modeling_bert.py
@@ -146,7 +146,9 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
         elif m_name == "kernel":
             array = np.transpose(array)
         try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
         except AssertionError as e:
             e.args += (pointer.shape, array.shape)
             raise
diff --git a/src/transformers/modeling_electra.py b/src/transformers/modeling_electra.py
index 1f2cb118c0..5e4e1286d2 100644
--- a/src/transformers/modeling_electra.py
+++ b/src/transformers/modeling_electra.py
@@ -114,7 +114,9 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
             elif m_name == "kernel":
                 array = np.transpose(array)
             try:
-                assert pointer.shape == array.shape, original_name
+                assert (
+                    pointer.shape == array.shape
+                ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
             except AssertionError as e:
                 e.args += (pointer.shape, array.shape)
                 raise
diff --git a/src/transformers/modeling_gpt2.py b/src/transformers/modeling_gpt2.py
index a2168726cc..3a8d104d89 100644
--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@@ -106,7 +106,9 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
                 num = int(scope_names[1])
                 pointer = pointer[num]
         try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
         except AssertionError as e:
             e.args += (pointer.shape, array.shape)
             raise
diff --git a/src/transformers/modeling_mobilebert.py b/src/transformers/modeling_mobilebert.py
index d3a4cd8e32..f0b01cfa61 100644
--- a/src/transformers/modeling_mobilebert.py
+++ b/src/transformers/modeling_mobilebert.py
@@ -130,7 +130,9 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path):
         elif m_name == "kernel":
             array = np.transpose(array)
         try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
         except AssertionError as e:
             e.args += (pointer.shape, array.shape)
             raise
diff --git a/src/transformers/modeling_openai.py b/src/transformers/modeling_openai.py
index e3406bc291..071b86662f 100644
--- a/src/transformers/modeling_openai.py
+++ b/src/transformers/modeling_openai.py
@@ -121,12 +121,16 @@ def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path):
                 num = int(scope_names[1])
                 pointer = pointer[num]
         try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
         except AssertionError as e:
             e.args += (pointer.shape, array.shape)
             raise
         try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
         except AssertionError as e:
             e.args += (pointer.shape, array.shape)
             raise
diff --git a/src/transformers/modeling_t5.py b/src/transformers/modeling_t5.py
index d7665ba201..03a0827e1d 100644
--- a/src/transformers/modeling_t5.py
+++ b/src/transformers/modeling_t5.py
@@ -131,7 +131,9 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
             logger.info("Transposing numpy weight of shape {} for {}".format(array.shape, name))
             array = np.transpose(array)
         try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
         except AssertionError as e:
             e.args += (pointer.shape, array.shape)
             raise
diff --git a/src/transformers/modeling_tf_albert.py b/src/transformers/modeling_tf_albert.py
index 0d9b699ddd..6facc33c22 100644
--- a/src/transformers/modeling_tf_albert.py
+++ b/src/transformers/modeling_tf_albert.py
@@ -170,7 +170,9 @@ class TFAlbertSelfAttention(tf.keras.layers.Layer):
             )
 
         self.num_attention_heads = config.num_attention_heads
-        assert config.hidden_size % config.num_attention_heads == 0
+        assert (
+            config.hidden_size % config.num_attention_heads == 0
+        ), f"Hidden size {config.hidden_size} not dividable by number of heads {config.num_attention_heads}"
         self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
         self.all_head_size = self.num_attention_heads * self.attention_head_size
 
diff --git a/src/transformers/modeling_tf_distilbert.py b/src/transformers/modeling_tf_distilbert.py
index a22328d743..577be0b6fc 100644
--- a/src/transformers/modeling_tf_distilbert.py
+++ b/src/transformers/modeling_tf_distilbert.py
@@ -195,7 +195,7 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
         self.dim = config.dim
         self.dropout = tf.keras.layers.Dropout(config.attention_dropout)
 
-        assert self.dim % self.n_heads == 0
+        assert self.dim % self.n_heads == 0, f"Hidden size {self.dim} not dividable by number of heads {self.n_heads}"
 
         self.q_lin = tf.keras.layers.Dense(
             config.dim, kernel_initializer=get_initializer(config.initializer_range), name="q_lin"
@@ -311,7 +311,9 @@ class TFTransformerBlock(tf.keras.layers.Layer):
         self.dropout = tf.keras.layers.Dropout(config.dropout)
         self.activation = config.activation
 
-        assert config.dim % config.n_heads == 0
+        assert (
+            config.dim % config.n_heads == 0
+        ), f"Hidden size {config.dim} not dividable by number of heads {config.n_heads}"
 
         self.attention = TFMultiHeadSelfAttention(config, name="attention")
         self.sa_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-12, name="sa_layer_norm")
@@ -395,11 +397,11 @@ class TFTransformer(tf.keras.layers.Layer):
             hidden_state = layer_outputs[-1]
 
             if cast_bool_to_primitive(output_attentions) is True:
-                assert len(layer_outputs) == 2
+                assert len(layer_outputs) == 2, f"Incorrect number of outputs {len(layer_outputs)} instead of 2"
                 attentions = layer_outputs[0]
                 all_attentions = all_attentions + (attentions,)
             else:
-                assert len(layer_outputs) == 1
+                assert len(layer_outputs) == 1, f"Incorrect number of outputs {len(layer_outputs)} instead of 1"
 
         # Add last layer
         if cast_bool_to_primitive(output_hidden_states) is True:
@@ -1024,7 +1026,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
         self.qa_outputs = tf.keras.layers.Dense(
             config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
         )
-        assert config.num_labels == 2
+        assert config.num_labels == 2, f"Incorrect number of labels {config.num_labels} instead of 2"
         self.dropout = tf.keras.layers.Dropout(config.qa_dropout)
 
     @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
diff --git a/src/transformers/modeling_tf_flaubert.py b/src/transformers/modeling_tf_flaubert.py
index cf721be25c..9885004586 100644
--- a/src/transformers/modeling_tf_flaubert.py
+++ b/src/transformers/modeling_tf_flaubert.py
@@ -193,7 +193,9 @@ class TFFlaubertMainLayer(TFXLMMainLayer):
 
         # check inputs
         # assert shape_list(lengths)[0] == bs
-        tf.debugging.assert_equal(shape_list(lengths)[0], bs)
+        tf.debugging.assert_equal(
+            shape_list(lengths)[0], bs
+        ), f"Expected batch size {shape_list(lengths)[0]} and received batch size {bs} mismatched"
         # assert lengths.max().item() <= slen
         # input_ids = input_ids.transpose(0, 1)  # batch size as dimension 0
         # assert (src_enc is None) == (src_len is None)
@@ -211,13 +213,17 @@ class TFFlaubertMainLayer(TFXLMMainLayer):
             position_ids = tf.expand_dims(tf.range(slen), axis=0)
         else:
             # assert shape_list(position_ids) == [bs, slen]  # (slen, bs)
-            tf.debugging.assert_equal(shape_list(position_ids), [bs, slen])
+            tf.debugging.assert_equal(
+                shape_list(position_ids), [bs, slen]
+            ), f"Position id shape {shape_list(position_ids)} and input shape {[bs, slen]} mismatched"
             # position_ids = position_ids.transpose(0, 1)
 
         # langs
         if langs is not None:
             # assert shape_list(langs) == [bs, slen]  # (slen, bs)
-            tf.debugging.assert_equal(shape_list(langs), [bs, slen])
+            tf.debugging.assert_equal(
+                shape_list(langs), [bs, slen]
+            ), f"Lang shape {shape_list(langs)} and input shape {[bs, slen]} mismatched"
             # langs = langs.transpose(0, 1)
 
         # Prepare head mask if needed
diff --git a/src/transformers/modeling_tf_openai.py b/src/transformers/modeling_tf_openai.py
index ef6805abcc..7d7adb1407 100644
--- a/src/transformers/modeling_tf_openai.py
+++ b/src/transformers/modeling_tf_openai.py
@@ -77,7 +77,9 @@ class TFAttention(tf.keras.layers.Layer):
 
         n_state = nx  # in Attention: n_state=768 (nx=n_embd)
         # [switch nx => n_state from Block to Attention to keep identical to TF implem]
-        assert n_state % config.n_head == 0
+        assert (
+            n_state % config.n_head == 0
+        ), f"Hidden dimension {n_state} not dividable by number of heads {config.n_head}"
         self.n_ctx = n_ctx
         self.n_head = config.n_head
         self.split_size = n_state
diff --git a/src/transformers/modeling_tf_xlnet.py b/src/transformers/modeling_tf_xlnet.py
index 769a997c49..1c8cba4e98 100644
--- a/src/transformers/modeling_tf_xlnet.py
+++ b/src/transformers/modeling_tf_xlnet.py
@@ -493,8 +493,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
                 bwd_pos_seq = tf.clip_by_value(bwd_pos_seq, -self.clamp_len, self.clamp_len)
 
             if bsz is not None:
-                # With bi_data, the batch size should be divisible by 2.
-                assert bsz % 2 == 0
+                assert bsz % 2 == 0, f"With bi_data, the batch size {bsz} should be divisible by 2"
                 fwd_pos_emb = self.positional_embedding(fwd_pos_seq, inv_freq, bsz // 2)
                 bwd_pos_emb = self.positional_embedding(bwd_pos_seq, inv_freq, bsz // 2)
             else:
diff --git a/src/transformers/modeling_transfo_xl.py b/src/transformers/modeling_transfo_xl.py
index bdad2f406d..f17855ce1d 100644
--- a/src/transformers/modeling_transfo_xl.py
+++ b/src/transformers/modeling_transfo_xl.py
@@ -155,7 +155,9 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
                 p_i.data = torch.from_numpy(arr_i)
         else:
             try:
-                assert pointer.shape == array.shape
+                assert (
+                    pointer.shape == array.shape
+                ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
             except AssertionError as e:
                 e.args += (pointer.shape, array.shape)
                 raise
diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py
index e0892661ff..9746fb008f 100644
--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
@@ -169,11 +169,15 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
             array = np.transpose(array)
         if isinstance(pointer, list):
             # Here we will split the TF weights
-            assert len(pointer) == array.shape[0]
+            assert (
+                len(pointer) == array.shape[0]
+            ), f"Pointer length {len(pointer)} and array length {array.shape[0]} mismatched"
             for i, p_i in enumerate(pointer):
                 arr_i = array[i, ...]
                 try:
-                    assert p_i.shape == arr_i.shape
+                    assert (
+                        p_i.shape == arr_i.shape
+                    ), f"Pointer shape {p_i.shape} and array shape {arr_i.shape} mismatched"
                 except AssertionError as e:
                     e.args += (p_i.shape, arr_i.shape)
                     raise
@@ -181,7 +185,9 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
                 p_i.data = torch.from_numpy(arr_i)
         else:
             try:
-                assert pointer.shape == array.shape
+                assert (
+                    pointer.shape == array.shape
+                ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
             except AssertionError as e:
                 e.args += (pointer.shape, array.shape)
                 raise
diff --git a/src/transformers/tokenization_transfo_xl.py b/src/transformers/tokenization_transfo_xl.py
index cd1635841a..9a28272177 100644
--- a/src/transformers/tokenization_transfo_xl.py
+++ b/src/transformers/tokenization_transfo_xl.py
@@ -147,7 +147,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
     def count_file(self, path, verbose=False, add_eos=False):
         if verbose:
             logger.info("counting file {} ...".format(path))
-        assert os.path.exists(path)
+        assert os.path.exists(path), f"Input file {path} not found"
 
         sents = []
         with open(path, "r", encoding="utf-8") as f:
@@ -233,7 +233,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
     def encode_file(self, path, ordered=False, verbose=False, add_eos=True, add_double_eos=False):
         if verbose:
             logger.info("encoding file {} ...".format(path))
-        assert os.path.exists(path)
+        assert os.path.exists(path), f"Output file {path} not found"
         encoded = []
         with open(path, "r", encoding="utf-8") as f:
             for idx, line in enumerate(f):
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index d63c4bb554..f4cab85c71 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -683,7 +683,8 @@ class SpecialTokensMixin:
         for key, value in kwargs.items():
             if key in self.SPECIAL_TOKENS_ATTRIBUTES:
                 if key == "additional_special_tokens":
-                    assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
+                    assert isinstance(value, (list, tuple)), f"Value {value} is not a list or tuple"
+                    assert all(isinstance(t, str) for t in value), "One of the tokens is not a string"
                     setattr(self, key, value)
                 elif isinstance(value, (str, AddedToken)):
                     setattr(self, key, value)
@@ -752,7 +753,7 @@ class SpecialTokensMixin:
 
         added_tokens = 0
         for key, value in special_tokens_dict.items():
-            assert key in self.SPECIAL_TOKENS_ATTRIBUTES
+            assert key in self.SPECIAL_TOKENS_ATTRIBUTES, f"Key {key} is not a special token"
 
             if self.verbose:
                 logger.info("Assigning %s to the %s key of the tokenizer", value, key)
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index f449dd138b..8a3209355a 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -124,11 +124,15 @@ class SequentialDistributedSampler(Sampler):
 
         # add extra samples to make it evenly divisible
         indices += indices[: (self.total_size - len(indices))]
-        assert len(indices) == self.total_size
+        assert (
+            len(indices) == self.total_size
+        ), f"Indices length {len(indices)} and total size {self.total_size} mismatched"
 
         # subsample
         indices = indices[self.rank * self.num_samples : (self.rank + 1) * self.num_samples]
-        assert len(indices) == self.num_samples
+        assert (
+            len(indices) == self.num_samples
+        ), f"Indices length {len(indices)} and and sample number {self.num_samples} mismatched"
 
         return iter(indices)
 
@@ -566,9 +570,11 @@ class Trainer:
                         # In all cases (even distributed/parallel), self.model is always a reference
                         # to the model we want to save.
                         if hasattr(model, "module"):
-                            assert model.module is self.model
+                            assert (
+                                model.module is self.model
+                            ), f"Module {model.module} should be a reference to self.model"
                         else:
-                            assert model is self.model
+                            assert model is self.model, f"Model {model} should be a reference to self.model"
                         # Save model checkpoint
                         output_dir = os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.global_step}")
 
diff --git a/templates/adding_a_new_example_script/utils_xxx.py b/templates/adding_a_new_example_script/utils_xxx.py
index b8f8cdf2b9..bc2a219f9d 100644
--- a/templates/adding_a_new_example_script/utils_xxx.py
+++ b/templates/adding_a_new_example_script/utils_xxx.py
@@ -327,9 +327,15 @@ def convert_examples_to_features(
                 segment_ids.append(pad_token_segment_id)
                 p_mask.append(1)
 
-            assert len(input_ids) == max_seq_length
-            assert len(input_mask) == max_seq_length
-            assert len(segment_ids) == max_seq_length
+            assert (
+                len(input_ids) == max_seq_length
+            ), f"Input ids and sequence have mismatched lengths {len(input_ids)} and {max_seq_length}"
+            assert (
+                len(input_mask) == max_seq_length
+            ), f"Input mask and sequence have mismatched lengths {len(input_mask)} and {max_seq_length}"
+            assert (
+                len(segment_ids) == max_seq_length
+            ), f"Segment ids and sequence have mismatched lengths {len(segment_ids)} and {max_seq_length}"
 
             span_is_impossible = example.is_impossible
             start_position = None
@@ -626,7 +632,7 @@ def write_predictions(
         if not nbest:
             nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
 
-        assert len(nbest) >= 1
+        assert len(nbest) >= 1, "No valid predictions"
 
         total_scores = []
         best_non_null_entry = None
@@ -647,7 +653,7 @@ def write_predictions(
             output["end_logit"] = entry.end_logit
             nbest_json.append(output)
 
-        assert len(nbest_json) >= 1
+        assert len(nbest_json) >= 1, "No valid predictions"
 
         if not version_2_with_negative:
             all_predictions[example.qas_id] = nbest_json[0]["text"]
@@ -843,8 +849,8 @@ def write_predictions_extended(
             output["end_log_prob"] = entry.end_log_prob
             nbest_json.append(output)
 
-        assert len(nbest_json) >= 1
-        assert best_non_null_entry is not None
+        assert len(nbest_json) >= 1, "No valid predictions"
+        assert best_non_null_entry is not None, "No valid predictions"
 
         score_diff = score_null
         scores_diff_json[example.qas_id] = score_diff
diff --git a/templates/adding_a_new_model/modeling_xxx.py b/templates/adding_a_new_model/modeling_xxx.py
index 73676ed249..fb54dd7b45 100644
--- a/templates/adding_a_new_model/modeling_xxx.py
+++ b/templates/adding_a_new_model/modeling_xxx.py
@@ -121,7 +121,9 @@ def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
         elif m_name == "kernel":
             array = np.transpose(array)
         try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer and array have mismatched shapes {pointer.shape} and {array.shape}"
         except AssertionError as e:
             e.args += (pointer.shape, array.shape)
             raise