Empty assert hunt (#6056)

* Fixed empty asserts

* black-reformatted stragglers in templates

* More code quality checks

* Update src/transformers/convert_marian_to_pytorch.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/convert_marian_to_pytorch.py

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>

* removed unused line as per @sshleifer

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
This commit is contained in:
Teven
2020-08-03 10:19:03 +02:00
committed by GitHub
parent 16c2240164
commit 5a0dac53bf
26 changed files with 131 additions and 63 deletions

View File

@@ -22,7 +22,7 @@ class TextDataset(Dataset):
def __init__(
self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, overwrite_cache=False,
):
assert os.path.isfile(file_path)
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
block_size = block_size - tokenizer.num_special_tokens_to_add(pair=False)
@@ -82,7 +82,7 @@ class LineByLineTextDataset(Dataset):
"""
def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int):
assert os.path.isfile(file_path)
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
# Here, we do not cache the features, operating under the assumption
# that we will soon use fast multithreaded tokenizers from the
# `tokenizers` repo everywhere =)

View File

@@ -51,7 +51,9 @@ if _has_sklearn:
}
def glue_compute_metrics(task_name, preds, labels):
assert len(preds) == len(labels)
assert len(preds) == len(
labels
), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
if task_name == "cola":
return {"mcc": matthews_corrcoef(labels, preds)}
elif task_name == "sst-2":
@@ -78,7 +80,9 @@ if _has_sklearn:
raise KeyError(task_name)
def xnli_compute_metrics(task_name, preds, labels):
assert len(preds) == len(labels)
assert len(preds) == len(
labels
), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
if task_name == "xnli":
return {"acc": simple_accuracy(preds, labels)}
else:

View File

@@ -523,7 +523,7 @@ def compute_predictions_logits(
if not nbest:
nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
assert len(nbest) >= 1
assert len(nbest) >= 1, "No valid predictions"
total_scores = []
best_non_null_entry = None
@@ -544,7 +544,7 @@ def compute_predictions_logits(
output["end_logit"] = entry.end_logit
nbest_json.append(output)
assert len(nbest_json) >= 1
assert len(nbest_json) >= 1, "No valid predictions"
if not version_2_with_negative:
all_predictions[example.qas_id] = nbest_json[0]["text"]
@@ -739,8 +739,8 @@ def compute_predictions_log_probs(
output["end_log_prob"] = entry.end_log_prob
nbest_json.append(output)
assert len(nbest_json) >= 1
assert best_non_null_entry is not None
assert len(nbest_json) >= 1, "No valid predictions"
assert best_non_null_entry is not None, "No valid predictions"
score_diff = score_null
scores_diff_json[example.qas_id] = score_diff

View File

@@ -194,8 +194,12 @@ class SingleSentenceClassificationProcessor(DataProcessor):
def add_examples(
self, texts_or_text_and_labels, labels=None, ids=None, overwrite_labels=False, overwrite_examples=False
):
assert labels is None or len(texts_or_text_and_labels) == len(labels)
assert ids is None or len(texts_or_text_and_labels) == len(ids)
assert labels is None or len(texts_or_text_and_labels) == len(
labels
), f"Text and labels have mismatched lengths {len(texts_or_text_and_labels)} and {len(labels)}"
assert ids is None or len(texts_or_text_and_labels) == len(
ids
), f"Text and ids have mismatched lengths {len(texts_or_text_and_labels)} and {len(ids)}"
if ids is None:
ids = [None] * len(texts_or_text_and_labels)
if labels is None:

View File

@@ -45,7 +45,9 @@ class XnliProcessor(DataProcessor):
text_a = line[0]
text_b = line[1]
label = "contradiction" if line[2] == "contradictory" else line[2]
assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
assert isinstance(text_a, str), f"Training input {text_a} is not a string"
assert isinstance(text_b, str), f"Training input {text_b} is not a string"
assert isinstance(label, str), f"Training label {label} is not a string"
examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
@@ -63,7 +65,9 @@ class XnliProcessor(DataProcessor):
text_a = line[6]
text_b = line[7]
label = line[1]
assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str)
assert isinstance(text_a, str), f"Training input {text_a} is not a string"
assert isinstance(text_b, str), f"Training input {text_b} is not a string"
assert isinstance(label, str), f"Training label {label} is not a string"
examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples