Empty assert hunt (#6056)

* Fixed empty asserts * black-reformatted stragglers in templates * More code quality checks * Update src/transformers/convert_marian_to_pytorch.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/convert_marian_to_pytorch.py Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * removed unused line as per @sshleifer Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
2020-08-03 10:19:03 +02:00
parent 16c2240164
commit 5a0dac53bf
26 changed files with 131 additions and 63 deletions
--- a/src/transformers/data/datasets/language_modeling.py
+++ b/src/transformers/data/datasets/language_modeling.py
@@ -22,7 +22,7 @@ class TextDataset(Dataset):
    def __init__(
        self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, overwrite_cache=False,
    ):
-        assert os.path.isfile(file_path)
+        assert os.path.isfile(file_path), f"Input file path {file_path} not found"

        block_size = block_size - tokenizer.num_special_tokens_to_add(pair=False)

@@ -82,7 +82,7 @@ class LineByLineTextDataset(Dataset):
    """

    def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int):
-        assert os.path.isfile(file_path)
+        assert os.path.isfile(file_path), f"Input file path {file_path} not found"
        # Here, we do not cache the features, operating under the assumption
        # that we will soon use fast multithreaded tokenizers from the
        # `tokenizers` repo everywhere =)
--- a/src/transformers/data/metrics/init.py
+++ b/src/transformers/data/metrics/init.py
@@ -51,7 +51,9 @@ if _has_sklearn:
        }

    def glue_compute_metrics(task_name, preds, labels):
-        assert len(preds) == len(labels)
+        assert len(preds) == len(
+            labels
+        ), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
        if task_name == "cola":
            return {"mcc": matthews_corrcoef(labels, preds)}
        elif task_name == "sst-2":
@@ -78,7 +80,9 @@ if _has_sklearn:
            raise KeyError(task_name)

    def xnli_compute_metrics(task_name, preds, labels):
-        assert len(preds) == len(labels)
+        assert len(preds) == len(
+            labels
+        ), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
        if task_name == "xnli":
            return {"acc": simple_accuracy(preds, labels)}
        else:
--- a/src/transformers/data/metrics/squad_metrics.py
+++ b/src/transformers/data/metrics/squad_metrics.py
@@ -523,7 +523,7 @@ def compute_predictions_logits(
        if not nbest:
            nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))

-        assert len(nbest) >= 1
+        assert len(nbest) >= 1, "No valid predictions"

        total_scores = []
        best_non_null_entry = None
@@ -544,7 +544,7 @@ def compute_predictions_logits(
            output["end_logit"] = entry.end_logit
            nbest_json.append(output)

-        assert len(nbest_json) >= 1
+        assert len(nbest_json) >= 1, "No valid predictions"

        if not version_2_with_negative:
            all_predictions[example.qas_id] = nbest_json[0]["text"]
@@ -739,8 +739,8 @@ def compute_predictions_log_probs(
            output["end_log_prob"] = entry.end_log_prob
            nbest_json.append(output)

-        assert len(nbest_json) >= 1
-        assert best_non_null_entry is not None
+        assert len(nbest_json) >= 1, "No valid predictions"
+        assert best_non_null_entry is not None, "No valid predictions"

        score_diff = score_null
        scores_diff_json[example.qas_id] = score_diff
--- a/src/transformers/data/processors/utils.py
+++ b/src/transformers/data/processors/utils.py
@@ -194,8 +194,12 @@ class SingleSentenceClassificationProcessor(DataProcessor):
    def add_examples(
        self, texts_or_text_and_labels, labels=None, ids=None, overwrite_labels=False, overwrite_examples=False
    ):
-        assert labels is None or len(texts_or_text_and_labels) == len(labels)
-        assert ids is None or len(texts_or_text_and_labels) == len(ids)
+        assert labels is None or len(texts_or_text_and_labels) == len(
+            labels
+        ), f"Text and labels have mismatched lengths {len(texts_or_text_and_labels)} and {len(labels)}"
+        assert ids is None or len(texts_or_text_and_labels) == len(
+            ids
+        ), f"Text and ids have mismatched lengths {len(texts_or_text_and_labels)} and {len(ids)}"
        if ids is None:
            ids = [None] * len(texts_or_text_and_labels)
        if labels is None:
--- a/src/transformers/data/processors/xnli.py
+++ b/src/transformers/data/processors/xnli.py
@@ -45,7 +45,9 @@ class XnliProcessor(DataProcessor):
            text_a = line[0]
            text_b = line[1]
            label = "contradiction" if line[2] == "contradictory" else line[2]
-            assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
+            assert isinstance(text_a, str), f"Training input {text_a} is not a string"
+            assert isinstance(text_b, str), f"Training input {text_b} is not a string"
+            assert isinstance(label, str), f"Training label {label} is not a string"
            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples

@@ -63,7 +65,9 @@ class XnliProcessor(DataProcessor):
            text_a = line[6]
            text_b = line[7]
            label = line[1]
-            assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
+            assert isinstance(text_a, str), f"Training input {text_a} is not a string"
+            assert isinstance(text_b, str), f"Training input {text_b} is not a string"
+            assert isinstance(label, str), f"Training label {label} is not a string"
            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples