Empty assert hunt (#6056)

* Fixed empty asserts * black-reformatted stragglers in templates * More code quality checks * Update src/transformers/convert_marian_to_pytorch.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/convert_marian_to_pytorch.py Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * removed unused line as per @sshleifer Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
2020-08-03 10:19:03 +02:00
parent 16c2240164
commit 5a0dac53bf
26 changed files with 131 additions and 63 deletions
--- a/src/transformers/commands/train.py
+++ b/src/transformers/commands/train.py
@@ -81,7 +81,6 @@ class TrainCommand(BaseTransformersCLICommand):
        self.framework = "tf" if is_tf_available() else "torch"
        os.makedirs(args.output, exist_ok=True)
        assert os.path.isdir(args.output)
        self.output = args.output
        self.column_label = args.column_label
--- a/src/transformers/convert_marian_to_pytorch.py
+++ b/src/transformers/convert_marian_to_pytorch.py
@@ -166,7 +166,7 @@ def write_model_card(
    extra_markdown = f"### {hf_model_name}\n\n* source languages: {s}\n* target languages: {t}\n*  OPUS readme: [{opus_name}]({readme_url})\n"
    # combine with opus markdown
    opus_readme_path = Path(f"{repo_path}{opus_name}/README.md")
-    assert opus_readme_path.exists(), opus_readme_path
+    assert opus_readme_path.exists(), f"Readme file {opus_readme_path} not found"
    content = opus_readme_path.open().read()
    content = content.split("\n# ")[-1]  # Get the lowest level 1 header in the README -- the most recent model.
    content = "*".join(content.split("*")[1:])
@@ -231,7 +231,9 @@ def fetch_test_set(test_set_url):
    src = lmap(str.strip, lns[::4])
    gold = lmap(str.strip, lns[1::4])
    mar_model = lmap(str.strip, lns[2::4])
-    assert len(gold) == len(mar_model) == len(src)
+    assert (
        len(gold) == len(mar_model) == len(src)
    ), f"Gold, marian and source lengths {len(gold)}, {len(mar_model)}, {len(src)} mismatched"
    os.remove(fname)
    return src, mar_model, gold
@@ -374,20 +376,21 @@ class OpusState:
        self.state_dict = np.load(npz_path)
        cfg = load_config_from_state_dict(self.state_dict)
        assert cfg["dim-vocabs"][0] == cfg["dim-vocabs"][1]
-        assert "Wpos" not in self.state_dict
+        assert "Wpos" not in self.state_dict, "Wpos key in state dictionary"
        self.state_dict = dict(self.state_dict)
        self.wemb, self.final_bias = add_emb_entries(self.state_dict["Wemb"], self.state_dict[BIAS_KEY], 1)
        self.pad_token_id = self.wemb.shape[0] - 1
        cfg["vocab_size"] = self.pad_token_id + 1
        # self.state_dict['Wemb'].sha
        self.state_keys = list(self.state_dict.keys())
-        if "Wtype" in self.state_dict:
+        assert "Wtype" not in self.state_dict, "Wtype key in state dictionary"
            raise ValueError("found Wtype key")
        self._check_layer_entries()
        self.source_dir = source_dir
        self.cfg = cfg
        hidden_size, intermediate_shape = self.state_dict["encoder_l1_ffn_W1"].shape
-        assert hidden_size == cfg["dim-emb"] == 512
+        assert (
            hidden_size == cfg["dim-emb"] == 512
        ), f"Hidden size {hidden_size} and configured size {cfg['dim_emb']} mismatched or not 512"
        # Process decoder.yml
        decoder_yml = cast_marian_config(load_yaml(source_dir / "decoder.yml"))
@@ -448,7 +451,7 @@ class OpusState:
    def load_marian_model(self) -> MarianMTModel:
        state_dict, cfg = self.state_dict, self.hf_config
-        assert cfg.static_position_embeddings
+        assert cfg.static_position_embeddings, "config.static_position_embeddings should be True"
        model = MarianMTModel(cfg)
        assert "hidden_size" not in cfg.to_dict()
@@ -476,7 +479,9 @@ class OpusState:
            raise NotImplementedError("Need to convert layernorm_embedding")
        assert not self.extra_keys, f"Failed to convert {self.extra_keys}"
-        assert model.model.shared.padding_idx == self.pad_token_id
+        assert (
            model.model.shared.padding_idx == self.pad_token_id
        ), f"Padding tokens {model.model.shared.padding_idx} and {self.pad_token_id} mismatched"
        return model
@@ -500,7 +505,9 @@ def convert(source_dir: Path, dest_dir):
    save_tokenizer(tokenizer, dest_dir)
    opus_state = OpusState(source_dir)
-    assert opus_state.cfg["vocab_size"] == len(tokenizer.encoder)
+    assert opus_state.cfg["vocab_size"] == len(
        tokenizer.encoder
    ), f"Original vocab size {opus_state.cfg['vocab_size']} and new vocab size {len(tokenizer.encoder)} mismatched"
    # save_json(opus_state.cfg, dest_dir / "marian_original_config.json")
    # ^^ Save human readable marian config for debugging
@@ -517,7 +524,7 @@ if __name__ == "__main__":
    args = parser.parse_args()
    source_dir = Path(args.src)
-    assert source_dir.exists()
+    assert source_dir.exists(), f"Source directory {source_dir} not found"
    dest_dir = f"converted-{source_dir.name}" if args.dest is None else args.dest
    convert(source_dir, dest_dir)
--- a/src/transformers/data/datasets/language_modeling.py
+++ b/src/transformers/data/datasets/language_modeling.py
@@ -22,7 +22,7 @@ class TextDataset(Dataset):
    def __init__(
        self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, overwrite_cache=False,
    ):
-        assert os.path.isfile(file_path)
+        assert os.path.isfile(file_path), f"Input file path {file_path} not found"
        block_size = block_size - tokenizer.num_special_tokens_to_add(pair=False)
@@ -82,7 +82,7 @@ class LineByLineTextDataset(Dataset):
    """
    def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int):
-        assert os.path.isfile(file_path)
+        assert os.path.isfile(file_path), f"Input file path {file_path} not found"
        # Here, we do not cache the features, operating under the assumption
        # that we will soon use fast multithreaded tokenizers from the
        # `tokenizers` repo everywhere =)
--- a/src/transformers/data/metrics/init.py
+++ b/src/transformers/data/metrics/init.py
@@ -51,7 +51,9 @@ if _has_sklearn:
        }
    def glue_compute_metrics(task_name, preds, labels):
-        assert len(preds) == len(labels)
+        assert len(preds) == len(
            labels
        ), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
        if task_name == "cola":
            return {"mcc": matthews_corrcoef(labels, preds)}
        elif task_name == "sst-2":
@@ -78,7 +80,9 @@ if _has_sklearn:
            raise KeyError(task_name)
    def xnli_compute_metrics(task_name, preds, labels):
-        assert len(preds) == len(labels)
+        assert len(preds) == len(
            labels
        ), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
        if task_name == "xnli":
            return {"acc": simple_accuracy(preds, labels)}
        else:
--- a/src/transformers/data/metrics/squad_metrics.py
+++ b/src/transformers/data/metrics/squad_metrics.py
@@ -523,7 +523,7 @@ def compute_predictions_logits(
        if not nbest:
            nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
-        assert len(nbest) >= 1
+        assert len(nbest) >= 1, "No valid predictions"
        total_scores = []
        best_non_null_entry = None
@@ -544,7 +544,7 @@ def compute_predictions_logits(
            output["end_logit"] = entry.end_logit
            nbest_json.append(output)
-        assert len(nbest_json) >= 1
+        assert len(nbest_json) >= 1, "No valid predictions"
        if not version_2_with_negative:
            all_predictions[example.qas_id] = nbest_json[0]["text"]
@@ -739,8 +739,8 @@ def compute_predictions_log_probs(
            output["end_log_prob"] = entry.end_log_prob
            nbest_json.append(output)
-        assert len(nbest_json) >= 1
+        assert len(nbest_json) >= 1, "No valid predictions"
-        assert best_non_null_entry is not None
+        assert best_non_null_entry is not None, "No valid predictions"
        score_diff = score_null
        scores_diff_json[example.qas_id] = score_diff
--- a/src/transformers/data/processors/utils.py
+++ b/src/transformers/data/processors/utils.py
@@ -194,8 +194,12 @@ class SingleSentenceClassificationProcessor(DataProcessor):
    def add_examples(
        self, texts_or_text_and_labels, labels=None, ids=None, overwrite_labels=False, overwrite_examples=False
    ):
-        assert labels is None or len(texts_or_text_and_labels) == len(labels)
+        assert labels is None or len(texts_or_text_and_labels) == len(
-        assert ids is None or len(texts_or_text_and_labels) == len(ids)
+            labels
        ), f"Text and labels have mismatched lengths {len(texts_or_text_and_labels)} and {len(labels)}"
        assert ids is None or len(texts_or_text_and_labels) == len(
            ids
        ), f"Text and ids have mismatched lengths {len(texts_or_text_and_labels)} and {len(ids)}"
        if ids is None:
            ids = [None] * len(texts_or_text_and_labels)
        if labels is None:
--- a/src/transformers/data/processors/xnli.py
+++ b/src/transformers/data/processors/xnli.py
@@ -45,7 +45,9 @@ class XnliProcessor(DataProcessor):
            text_a = line[0]
            text_b = line[1]
            label = "contradiction" if line[2] == "contradictory" else line[2]
-            assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
+            assert isinstance(text_a, str), f"Training input {text_a} is not a string"
            assert isinstance(text_b, str), f"Training input {text_b} is not a string"
            assert isinstance(label, str), f"Training label {label} is not a string"
            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples
@@ -63,7 +65,9 @@ class XnliProcessor(DataProcessor):
            text_a = line[6]
            text_b = line[7]
            label = line[1]
-            assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
+            assert isinstance(text_a, str), f"Training input {text_a} is not a string"
            assert isinstance(text_b, str), f"Training input {text_b} is not a string"
            assert isinstance(label, str), f"Training label {label} is not a string"
            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples
--- a/src/transformers/modeling_albert.py
+++ b/src/transformers/modeling_albert.py
@@ -179,7 +179,9 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert pointer.shape == array.shape
+            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
--- a/src/transformers/modeling_bert.py
+++ b/src/transformers/modeling_bert.py
@@ -146,7 +146,9 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert pointer.shape == array.shape
+            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
--- a/src/transformers/modeling_electra.py
+++ b/src/transformers/modeling_electra.py
@@ -114,7 +114,9 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_
            elif m_name == "kernel":
                array = np.transpose(array)
            try:
-                assert pointer.shape == array.shape, original_name
+                assert (
                    pointer.shape == array.shape
                ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise
--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@@ -106,7 +106,9 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
                num = int(scope_names[1])
                pointer = pointer[num]
        try:
-            assert pointer.shape == array.shape
+            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
--- a/src/transformers/modeling_mobilebert.py
+++ b/src/transformers/modeling_mobilebert.py
@@ -130,7 +130,9 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert pointer.shape == array.shape
+            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
--- a/src/transformers/modeling_openai.py
+++ b/src/transformers/modeling_openai.py
@@ -121,12 +121,16 @@ def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path):
                num = int(scope_names[1])
                pointer = pointer[num]
        try:
-            assert pointer.shape == array.shape
+            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        try:
-            assert pointer.shape == array.shape
+            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
--- a/src/transformers/modeling_t5.py
+++ b/src/transformers/modeling_t5.py
@@ -131,7 +131,9 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
            logger.info("Transposing numpy weight of shape {} for {}".format(array.shape, name))
            array = np.transpose(array)
        try:
-            assert pointer.shape == array.shape
+            assert (
                pointer.shape == array.shape
            ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
--- a/src/transformers/modeling_tf_albert.py
+++ b/src/transformers/modeling_tf_albert.py
@@ -170,7 +170,9 @@ class TFAlbertSelfAttention(tf.keras.layers.Layer):
            )
        self.num_attention_heads = config.num_attention_heads
-        assert config.hidden_size % config.num_attention_heads == 0
+        assert (
            config.hidden_size % config.num_attention_heads == 0
        ), f"Hidden size {config.hidden_size} not dividable by number of heads {config.num_attention_heads}"
        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size
--- a/src/transformers/modeling_tf_distilbert.py
+++ b/src/transformers/modeling_tf_distilbert.py
@@ -195,7 +195,7 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
        self.dim = config.dim
        self.dropout = tf.keras.layers.Dropout(config.attention_dropout)
-        assert self.dim % self.n_heads == 0
+        assert self.dim % self.n_heads == 0, f"Hidden size {self.dim} not dividable by number of heads {self.n_heads}"
        self.q_lin = tf.keras.layers.Dense(
            config.dim, kernel_initializer=get_initializer(config.initializer_range), name="q_lin"
@@ -311,7 +311,9 @@ class TFTransformerBlock(tf.keras.layers.Layer):
        self.dropout = tf.keras.layers.Dropout(config.dropout)
        self.activation = config.activation
-        assert config.dim % config.n_heads == 0
+        assert (
            config.dim % config.n_heads == 0
        ), f"Hidden size {config.dim} not dividable by number of heads {config.n_heads}"
        self.attention = TFMultiHeadSelfAttention(config, name="attention")
        self.sa_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-12, name="sa_layer_norm")
@@ -395,11 +397,11 @@ class TFTransformer(tf.keras.layers.Layer):
            hidden_state = layer_outputs[-1]
            if cast_bool_to_primitive(output_attentions) is True:
-                assert len(layer_outputs) == 2
+                assert len(layer_outputs) == 2, f"Incorrect number of outputs {len(layer_outputs)} instead of 2"
                attentions = layer_outputs[0]
                all_attentions = all_attentions + (attentions,)
            else:
-                assert len(layer_outputs) == 1
+                assert len(layer_outputs) == 1, f"Incorrect number of outputs {len(layer_outputs)} instead of 1"
        # Add last layer
        if cast_bool_to_primitive(output_hidden_states) is True:
@@ -1024,7 +1026,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn
        self.qa_outputs = tf.keras.layers.Dense(
            config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
        )
-        assert config.num_labels == 2
+        assert config.num_labels == 2, f"Incorrect number of labels {config.num_labels} instead of 2"
        self.dropout = tf.keras.layers.Dropout(config.qa_dropout)
    @add_start_docstrings_to_callable(DISTILBERT_INPUTS_DOCSTRING)
--- a/src/transformers/modeling_tf_flaubert.py
+++ b/src/transformers/modeling_tf_flaubert.py
@@ -193,7 +193,9 @@ class TFFlaubertMainLayer(TFXLMMainLayer):
        # check inputs
        # assert shape_list(lengths)[0] == bs
-        tf.debugging.assert_equal(shape_list(lengths)[0], bs)
+        tf.debugging.assert_equal(
            shape_list(lengths)[0], bs
        ), f"Expected batch size {shape_list(lengths)[0]} and received batch size {bs} mismatched"
        # assert lengths.max().item() <= slen
        # input_ids = input_ids.transpose(0, 1)  # batch size as dimension 0
        # assert (src_enc is None) == (src_len is None)
@@ -211,13 +213,17 @@ class TFFlaubertMainLayer(TFXLMMainLayer):
            position_ids = tf.expand_dims(tf.range(slen), axis=0)
        else:
            # assert shape_list(position_ids) == [bs, slen]  # (slen, bs)
-            tf.debugging.assert_equal(shape_list(position_ids), [bs, slen])
+            tf.debugging.assert_equal(
                shape_list(position_ids), [bs, slen]
            ), f"Position id shape {shape_list(position_ids)} and input shape {[bs, slen]} mismatched"
            # position_ids = position_ids.transpose(0, 1)
        # langs
        if langs is not None:
            # assert shape_list(langs) == [bs, slen]  # (slen, bs)
-            tf.debugging.assert_equal(shape_list(langs), [bs, slen])
+            tf.debugging.assert_equal(
                shape_list(langs), [bs, slen]
            ), f"Lang shape {shape_list(langs)} and input shape {[bs, slen]} mismatched"
            # langs = langs.transpose(0, 1)
        # Prepare head mask if needed
--- a/src/transformers/modeling_tf_openai.py
+++ b/src/transformers/modeling_tf_openai.py
@@ -77,7 +77,9 @@ class TFAttention(tf.keras.layers.Layer):
        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
-        assert n_state % config.n_head == 0
+        assert (
            n_state % config.n_head == 0
        ), f"Hidden dimension {n_state} not dividable by number of heads {config.n_head}"
        self.n_ctx = n_ctx
        self.n_head = config.n_head
        self.split_size = n_state
--- a/src/transformers/modeling_tf_xlnet.py
+++ b/src/transformers/modeling_tf_xlnet.py
@@ -493,8 +493,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
                bwd_pos_seq = tf.clip_by_value(bwd_pos_seq, -self.clamp_len, self.clamp_len)
            if bsz is not None:
-                # With bi_data, the batch size should be divisible by 2.
+                assert bsz % 2 == 0, f"With bi_data, the batch size {bsz} should be divisible by 2"
                assert bsz % 2 == 0
                fwd_pos_emb = self.positional_embedding(fwd_pos_seq, inv_freq, bsz // 2)
                bwd_pos_emb = self.positional_embedding(bwd_pos_seq, inv_freq, bsz // 2)
            else:
--- a/src/transformers/modeling_transfo_xl.py
+++ b/src/transformers/modeling_transfo_xl.py
@@ -155,7 +155,9 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path):
                p_i.data = torch.from_numpy(arr_i)
        else:
            try:
-                assert pointer.shape == array.shape
+                assert (
                    pointer.shape == array.shape
                ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise
--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
@@ -169,11 +169,15 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
            array = np.transpose(array)
        if isinstance(pointer, list):
            # Here we will split the TF weights
-            assert len(pointer) == array.shape[0]
+            assert (
                len(pointer) == array.shape[0]
            ), f"Pointer length {len(pointer)} and array length {array.shape[0]} mismatched"
            for i, p_i in enumerate(pointer):
                arr_i = array[i, ...]
                try:
-                    assert p_i.shape == arr_i.shape
+                    assert (
                        p_i.shape == arr_i.shape
                    ), f"Pointer shape {p_i.shape} and array shape {arr_i.shape} mismatched"
                except AssertionError as e:
                    e.args += (p_i.shape, arr_i.shape)
                    raise
@@ -181,7 +185,9 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
                p_i.data = torch.from_numpy(arr_i)
        else:
            try:
-                assert pointer.shape == array.shape
+                assert (
                    pointer.shape == array.shape
                ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise
--- a/src/transformers/tokenization_transfo_xl.py
+++ b/src/transformers/tokenization_transfo_xl.py
@@ -147,7 +147,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
    def count_file(self, path, verbose=False, add_eos=False):
        if verbose:
            logger.info("counting file {} ...".format(path))
-        assert os.path.exists(path)
+        assert os.path.exists(path), f"Input file {path} not found"
        sents = []
        with open(path, "r", encoding="utf-8") as f:
@@ -233,7 +233,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
    def encode_file(self, path, ordered=False, verbose=False, add_eos=True, add_double_eos=False):
        if verbose:
            logger.info("encoding file {} ...".format(path))
-        assert os.path.exists(path)
+        assert os.path.exists(path), f"Output file {path} not found"
        encoded = []
        with open(path, "r", encoding="utf-8") as f:
            for idx, line in enumerate(f):
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -683,7 +683,8 @@ class SpecialTokensMixin:
        for key, value in kwargs.items():
            if key in self.SPECIAL_TOKENS_ATTRIBUTES:
                if key == "additional_special_tokens":
-                    assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
+                    assert isinstance(value, (list, tuple)), f"Value {value} is not a list or tuple"
                    assert all(isinstance(t, str) for t in value), "One of the tokens is not a string"
                    setattr(self, key, value)
                elif isinstance(value, (str, AddedToken)):
                    setattr(self, key, value)
@@ -752,7 +753,7 @@ class SpecialTokensMixin:
        added_tokens = 0
        for key, value in special_tokens_dict.items():
-            assert key in self.SPECIAL_TOKENS_ATTRIBUTES
+            assert key in self.SPECIAL_TOKENS_ATTRIBUTES, f"Key {key} is not a special token"
            if self.verbose:
                logger.info("Assigning %s to the %s key of the tokenizer", value, key)
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -124,11 +124,15 @@ class SequentialDistributedSampler(Sampler):
        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - len(indices))]
-        assert len(indices) == self.total_size
+        assert (
            len(indices) == self.total_size
        ), f"Indices length {len(indices)} and total size {self.total_size} mismatched"
        # subsample
        indices = indices[self.rank * self.num_samples : (self.rank + 1) * self.num_samples]
-        assert len(indices) == self.num_samples
+        assert (
            len(indices) == self.num_samples
        ), f"Indices length {len(indices)} and and sample number {self.num_samples} mismatched"
        return iter(indices)
@@ -566,9 +570,11 @@ class Trainer:
                        # In all cases (even distributed/parallel), self.model is always a reference
                        # to the model we want to save.
                        if hasattr(model, "module"):
-                            assert model.module is self.model
+                            assert (
                                model.module is self.model
                            ), f"Module {model.module} should be a reference to self.model"
                        else:
-                            assert model is self.model
+                            assert model is self.model, f"Model {model} should be a reference to self.model"
                        # Save model checkpoint
                        output_dir = os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.global_step}")
--- a/templates/adding_a_new_example_script/utils_xxx.py
+++ b/templates/adding_a_new_example_script/utils_xxx.py
@@ -327,9 +327,15 @@ def convert_examples_to_features(
                segment_ids.append(pad_token_segment_id)
                p_mask.append(1)
-            assert len(input_ids) == max_seq_length
+            assert (
-            assert len(input_mask) == max_seq_length
+                len(input_ids) == max_seq_length
-            assert len(segment_ids) == max_seq_length
+            ), f"Input ids and sequence have mismatched lengths {len(input_ids)} and {max_seq_length}"
            assert (
                len(input_mask) == max_seq_length
            ), f"Input mask and sequence have mismatched lengths {len(input_mask)} and {max_seq_length}"
            assert (
                len(segment_ids) == max_seq_length
            ), f"Segment ids and sequence have mismatched lengths {len(segment_ids)} and {max_seq_length}"
            span_is_impossible = example.is_impossible
            start_position = None
@@ -626,7 +632,7 @@ def write_predictions(
        if not nbest:
            nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
-        assert len(nbest) >= 1
+        assert len(nbest) >= 1, "No valid predictions"
        total_scores = []
        best_non_null_entry = None
@@ -647,7 +653,7 @@ def write_predictions(
            output["end_logit"] = entry.end_logit
            nbest_json.append(output)
-        assert len(nbest_json) >= 1
+        assert len(nbest_json) >= 1, "No valid predictions"
        if not version_2_with_negative:
            all_predictions[example.qas_id] = nbest_json[0]["text"]
@@ -843,8 +849,8 @@ def write_predictions_extended(
            output["end_log_prob"] = entry.end_log_prob
            nbest_json.append(output)
-        assert len(nbest_json) >= 1
+        assert len(nbest_json) >= 1, "No valid predictions"
-        assert best_non_null_entry is not None
+        assert best_non_null_entry is not None, "No valid predictions"
        score_diff = score_null
        scores_diff_json[example.qas_id] = score_diff
--- a/templates/adding_a_new_model/modeling_xxx.py
+++ b/templates/adding_a_new_model/modeling_xxx.py
@@ -121,7 +121,9 @@ def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert pointer.shape == array.shape
+            assert (
                pointer.shape == array.shape
            ), f"Pointer and array have mismatched shapes {pointer.shape} and {array.shape}"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise