From 0513f8d275022d4055b710a33cd520b2000982bf Mon Sep 17 00:00:00 2001 From: Victor SANH Date: Mon, 3 Aug 2020 15:02:51 -0400 Subject: [PATCH] correct label extraction + add note on discrepancies on trained MNLI model and HANS (#6221) --- examples/adversarial/utils_hans.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/adversarial/utils_hans.py b/examples/adversarial/utils_hans.py index 8f230fad98..1860c98dcc 100644 --- a/examples/adversarial/utils_hans.py +++ b/examples/adversarial/utils_hans.py @@ -255,7 +255,11 @@ class HansProcessor(DataProcessor): return self._create_examples(self._read_tsv(os.path.join(data_dir, "heuristics_evaluation_set.txt")), "dev") def get_labels(self): - """See base class.""" + """See base class. + Note that we follow the standard three labels for MNLI + (see :class:`~transformers.data.processors.utils.MnliProcessor`) + but the HANS evaluation groups `contradiction` and `neutral` into `non-entailment` (label 0) while + `entailment` is label 1.""" return ["contradiction", "entailment", "neutral"] def _create_examples(self, lines, set_type): @@ -268,7 +272,7 @@ class HansProcessor(DataProcessor): text_a = line[5] text_b = line[6] pairID = line[7][2:] if line[7].startswith("ex") else line[7] - label = line[-1] + label = line[0] examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label, pairID=pairID)) return examples