Add cache_dir to save features in GLUE + Differentiate match/mismatch for MNLI metrics (#4621)
* Glue task cleaup * Enable writing cache to cache_dir in case dataset lives in readOnly filesystem. * Differentiate match vs mismatch for MNLI metrics. * Style * Fix pytype * Fix type * Use cache_dir in mnli mismatch eval dataset * Small Tweaks Co-authored-by: Julien Chaumond <chaumond@gmail.com>
This commit is contained in:
@@ -70,6 +70,7 @@ class GlueDataset(Dataset):
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
limit_length: Optional[int] = None,
|
||||
mode: Union[str, Split] = Split.train,
|
||||
cache_dir: Optional[str] = None,
|
||||
):
|
||||
self.args = args
|
||||
self.processor = glue_processors[args.task_name]()
|
||||
@@ -81,7 +82,7 @@ class GlueDataset(Dataset):
|
||||
raise KeyError("mode is not a valid split name")
|
||||
# Load data features from cache or dataset file
|
||||
cached_features_file = os.path.join(
|
||||
args.data_dir,
|
||||
cache_dir if cache_dir is not None else args.data_dir,
|
||||
"cached_{}_{}_{}_{}".format(
|
||||
mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), args.task_name,
|
||||
),
|
||||
|
||||
@@ -63,9 +63,9 @@ if _has_sklearn:
|
||||
elif task_name == "qqp":
|
||||
return acc_and_f1(preds, labels)
|
||||
elif task_name == "mnli":
|
||||
return {"acc": simple_accuracy(preds, labels)}
|
||||
return {"mnli/acc": simple_accuracy(preds, labels)}
|
||||
elif task_name == "mnli-mm":
|
||||
return {"acc": simple_accuracy(preds, labels)}
|
||||
return {"mnli-mm/acc": simple_accuracy(preds, labels)}
|
||||
elif task_name == "qnli":
|
||||
return {"acc": simple_accuracy(preds, labels)}
|
||||
elif task_name == "rte":
|
||||
|
||||
Reference in New Issue
Block a user