From b231a413f5d58592bb4d98304c3d3b668c5d4a42 Mon Sep 17 00:00:00 2001 From: Jin Young Sohn Date: Tue, 2 Jun 2020 10:40:14 -0700 Subject: [PATCH] Add cache_dir to save features in GLUE + Differentiate match/mismatch for MNLI metrics (#4621) * Glue task cleaup * Enable writing cache to cache_dir in case dataset lives in readOnly filesystem. * Differentiate match vs mismatch for MNLI metrics. * Style * Fix pytype * Fix type * Use cache_dir in mnli mismatch eval dataset * Small Tweaks Co-authored-by: Julien Chaumond --- examples/text-classification/run_glue.py | 44 ++++++++++++++++------- src/transformers/data/datasets/glue.py | 3 +- src/transformers/data/metrics/__init__.py | 4 +-- src/transformers/trainer.py | 1 + 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py index f7392a2857..cf9b765a82 100644 --- a/examples/text-classification/run_glue.py +++ b/examples/text-classification/run_glue.py @@ -21,7 +21,7 @@ import logging import os import sys from dataclasses import dataclass, field -from typing import Dict, Optional +from typing import Callable, Dict, Optional import numpy as np @@ -134,16 +134,29 @@ def main(): ) # Get datasets - train_dataset = GlueDataset(data_args, tokenizer=tokenizer) if training_args.do_train else None - eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev") if training_args.do_eval else None - test_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="test") if training_args.do_predict else None + train_dataset = ( + GlueDataset(data_args, tokenizer=tokenizer, cache_dir=model_args.cache_dir) if training_args.do_train else None + ) + eval_dataset = ( + GlueDataset(data_args, tokenizer=tokenizer, mode="dev", cache_dir=model_args.cache_dir) + if training_args.do_eval + else None + ) + test_dataset = ( + GlueDataset(data_args, tokenizer=tokenizer, mode="test", cache_dir=model_args.cache_dir) + if training_args.do_predict + else None + ) - def compute_metrics(p: EvalPrediction) -> Dict: - if output_mode == "classification": - preds = np.argmax(p.predictions, axis=1) - elif output_mode == "regression": - preds = np.squeeze(p.predictions) - return glue_compute_metrics(data_args.task_name, preds, p.label_ids) + def build_compute_metrics_fn(task_name: str) -> Callable[[EvalPrediction], Dict]: + def compute_metrics_fn(p: EvalPrediction): + if output_mode == "classification": + preds = np.argmax(p.predictions, axis=1) + elif output_mode == "regression": + preds = np.squeeze(p.predictions) + return glue_compute_metrics(task_name, preds, p.label_ids) + + return compute_metrics_fn # Initialize our Trainer trainer = Trainer( @@ -151,7 +164,7 @@ def main(): args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, - compute_metrics=compute_metrics, + compute_metrics=build_compute_metrics_fn(data_args.task_name), ) # Training @@ -174,9 +187,12 @@ def main(): eval_datasets = [eval_dataset] if data_args.task_name == "mnli": mnli_mm_data_args = dataclasses.replace(data_args, task_name="mnli-mm") - eval_datasets.append(GlueDataset(mnli_mm_data_args, tokenizer=tokenizer, mode="dev")) + eval_datasets.append( + GlueDataset(mnli_mm_data_args, tokenizer=tokenizer, mode="dev", cache_dir=model_args.cache_dir) + ) for eval_dataset in eval_datasets: + trainer.compute_metrics = build_compute_metrics_fn(eval_dataset.args.task_name) eval_result = trainer.evaluate(eval_dataset=eval_dataset) output_eval_file = os.path.join( @@ -196,7 +212,9 @@ def main(): test_datasets = [test_dataset] if data_args.task_name == "mnli": mnli_mm_data_args = dataclasses.replace(data_args, task_name="mnli-mm") - test_datasets.append(GlueDataset(mnli_mm_data_args, tokenizer=tokenizer, mode="test")) + test_datasets.append( + GlueDataset(mnli_mm_data_args, tokenizer=tokenizer, mode="test", cache_dir=model_args.cache_dir) + ) for test_dataset in test_datasets: predictions = trainer.predict(test_dataset=test_dataset).predictions diff --git a/src/transformers/data/datasets/glue.py b/src/transformers/data/datasets/glue.py index 2ee260ea9e..1775b93f92 100644 --- a/src/transformers/data/datasets/glue.py +++ b/src/transformers/data/datasets/glue.py @@ -70,6 +70,7 @@ class GlueDataset(Dataset): tokenizer: PreTrainedTokenizer, limit_length: Optional[int] = None, mode: Union[str, Split] = Split.train, + cache_dir: Optional[str] = None, ): self.args = args self.processor = glue_processors[args.task_name]() @@ -81,7 +82,7 @@ class GlueDataset(Dataset): raise KeyError("mode is not a valid split name") # Load data features from cache or dataset file cached_features_file = os.path.join( - args.data_dir, + cache_dir if cache_dir is not None else args.data_dir, "cached_{}_{}_{}_{}".format( mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), args.task_name, ), diff --git a/src/transformers/data/metrics/__init__.py b/src/transformers/data/metrics/__init__.py index 6c29c2313d..59ffdc8db1 100644 --- a/src/transformers/data/metrics/__init__.py +++ b/src/transformers/data/metrics/__init__.py @@ -63,9 +63,9 @@ if _has_sklearn: elif task_name == "qqp": return acc_and_f1(preds, labels) elif task_name == "mnli": - return {"acc": simple_accuracy(preds, labels)} + return {"mnli/acc": simple_accuracy(preds, labels)} elif task_name == "mnli-mm": - return {"acc": simple_accuracy(preds, labels)} + return {"mnli-mm/acc": simple_accuracy(preds, labels)} elif task_name == "qnli": return {"acc": simple_accuracy(preds, labels)} elif task_name == "rte": diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 51b03d4607..d0914aa7a5 100644 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -553,6 +553,7 @@ class Trainer: if self.tb_writer: for k, v in logs.items(): self.tb_writer.add_scalar(k, v, self.global_step) + self.tb_writer.flush() if is_wandb_available(): wandb.log(logs, step=self.global_step) output = json.dumps({**logs, **{"step": self.global_step}})