From b923871bb78f538e3c2e4bf36776986c800da1ae Mon Sep 17 00:00:00 2001 From: Doug Blank Date: Thu, 6 Aug 2020 08:31:30 -0700 Subject: [PATCH] Adds comet_ml to the list of auto-experiment loggers (#6176) * Support for Comet.ml * Need to import comet first * Log this model, not the one in the backprop step * Log args as hyperparameters; use framework to allow fine control * Log hyperparameters with context * Apply black formatting * isort fix integrations * isort fix __init__ * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer_tf.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Address review comments * Style + Quality, remove Tensorboard import test Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre --- examples/README.md | 24 +++++++++-- src/transformers/__init__.py | 3 ++ src/transformers/integrations.py | 48 +++++++++++++++++++++ src/transformers/trainer.py | 72 +++++++++++++++++++++---------- src/transformers/trainer_tf.py | 50 ++++++++++++++++++++- src/transformers/trainer_utils.py | 18 -------- 6 files changed, 171 insertions(+), 44 deletions(-) create mode 100644 src/transformers/integrations.py diff --git a/examples/README.md b/examples/README.md index a298ea4ea3..11b83792be 100644 --- a/examples/README.md +++ b/examples/README.md @@ -81,7 +81,13 @@ Feedback and more use cases and benchmarks involving TPUs are welcome, please sh ## Logging & Experiment tracking -You can easily log and monitor your runs code. [TensorBoard](https://www.tensorflow.org/tensorboard) and [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface) are currently supported. +You can easily log and monitor your runs code. The following are currently supported: + +* [TensorBoard](https://www.tensorflow.org/tensorboard) +* [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface) +* [Comet ML](https://www.comet.ml/docs/python-sdk/huggingface/) + +### Weights & Biases To use Weights & Biases, install the wandb package with: @@ -104,6 +110,18 @@ wandb.login() Whenever you use `Trainer` or `TFTrainer` classes, your losses, evaluation metrics, model topology and gradients (for `Trainer` only) will automatically be logged. -For advanced configuration and examples, refer to the [W&B documentation](https://docs.wandb.com/library/integrations/huggingface). - When using 🤗 Transformers with PyTorch Lightning, runs can be tracked through `WandbLogger`. Refer to related [documentation & examples](https://docs.wandb.com/library/frameworks/pytorch/lightning). + +### Comet.ml + +To use `comet_ml`, install the Python package with: + +```bash +pip install comet_ml +``` + +or if in a Conda environment: + +```bash +conda install -c comet_ml -c anaconda -c conda-forge comet_ml +``` diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index cf86a2d028..c43e19604f 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -88,6 +88,9 @@ from .file_utils import ( ) from .hf_argparser import HfArgumentParser +# Integrations +from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available + # Model Cards from .modelcard import ModelCard diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py new file mode 100644 index 0000000000..c200d4e1df --- /dev/null +++ b/src/transformers/integrations.py @@ -0,0 +1,48 @@ +# Integrations with other Python libraries + +import os + + +try: + import comet_ml # noqa: F401 + + _has_comet = True +except (ImportError): + _has_comet = False + + +try: + import wandb + + wandb.ensure_configured() + if wandb.api.api_key is None: + _has_wandb = False + wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.") + else: + _has_wandb = False if os.getenv("WANDB_DISABLED") else True +except (ImportError, AttributeError): + _has_wandb = False + +try: + from torch.utils.tensorboard import SummaryWriter # noqa: F401 + + _has_tensorboard = True +except ImportError: + try: + from tensorboardX import SummaryWriter # noqa: F401 + + _has_tensorboard = True + except ImportError: + _has_tensorboard = False + + +def is_wandb_available(): + return _has_wandb + + +def is_comet_available(): + return _has_comet + + +def is_tensorboard_available(): + return _has_tensorboard diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 10674c0620..d8aeddb853 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -20,16 +20,10 @@ from tqdm.auto import tqdm, trange from .data.data_collator import DataCollator, default_data_collator from .file_utils import is_torch_tpu_available +from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available from .modeling_utils import PreTrainedModel from .optimization import AdamW, get_linear_schedule_with_warmup -from .trainer_utils import ( - PREFIX_CHECKPOINT_DIR, - EvalPrediction, - PredictionOutput, - TrainOutput, - is_wandb_available, - set_seed, -) +from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, TrainOutput, set_seed from .training_args import TrainingArguments @@ -53,26 +47,17 @@ if is_torch_tpu_available(): import torch_xla.debug.metrics as met import torch_xla.distributed.parallel_loader as pl -try: - from torch.utils.tensorboard import SummaryWriter - - _has_tensorboard = True -except ImportError: +if is_tensorboard_available(): try: - from tensorboardX import SummaryWriter - - _has_tensorboard = True + from torch.utils.tensorboard import SummaryWriter except ImportError: - _has_tensorboard = False - - -def is_tensorboard_available(): - return _has_tensorboard - + from tensorboardX import SummaryWriter if is_wandb_available(): import wandb +if is_comet_available(): + import comet_ml logger = logging.getLogger(__name__) @@ -210,6 +195,13 @@ class Trainer: "You are instantiating a Trainer but W&B is not installed. To use wandb logging, " "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface." ) + if is_comet_available(): + self.setup_comet() + elif os.environ.get("COMET_MODE") != "DISABLED": + logger.info( + "To use comet_ml logging, run `pip/conda install comet_ml` " + "see https://www.comet.ml/docs/python-sdk/huggingface/" + ) set_seed(self.args.seed) # Create output directory if needed if self.is_world_process_zero(): @@ -393,6 +385,37 @@ class Trainer: self.model, log=os.getenv("WANDB_WATCH", "gradients"), log_freq=max(100, self.args.logging_steps) ) + def setup_comet(self): + """ + Setup the optional Comet.ml integration. + + Environment: + COMET_MODE: + (Optional): str - "OFFLINE", "ONLINE", or "DISABLED" + COMET_PROJECT_NAME: + (Optional): str - Comet.ml project name for experiments + COMET_OFFLINE_DIRECTORY: + (Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE" + + For a number of configurable items in the environment, + see `here `__ + """ + if self.is_world_master(): + comet_mode = os.getenv("COMET_MODE", "ONLINE").upper() + args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")} + experiment = None + if comet_mode == "ONLINE": + experiment = comet_ml.Experiment(**args) + logger.info("Automatic Comet.ml online logging enabled") + elif comet_mode == "OFFLINE": + args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./") + experiment = comet_ml.OfflineExperiment(**args) + logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished") + if experiment is not None: + experiment._set_model_graph(self.model, framework="transformers") + experiment._log_parameters(self.args, prefix="args/", framework="transformers") + experiment._log_parameters(self.model.config, prefix="config/", framework="transformers") + def num_examples(self, dataloader: DataLoader) -> int: """ Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset. @@ -655,6 +678,11 @@ class Trainer: if is_wandb_available(): if self.is_world_process_zero(): wandb.log(logs, step=self.global_step) + if is_comet_available(): + if self.is_world_process_zero(): + experiment = comet_ml.config.get_global_experiment() + if experiment is not None: + experiment._log_metrics(logs, step=self.global_step, epoch=self.epoch, framework="transformers") output = {**logs, **{"step": self.global_step}} if iterator is not None: iterator.write(output) diff --git a/src/transformers/trainer_tf.py b/src/transformers/trainer_tf.py index d388017437..7cb387164e 100644 --- a/src/transformers/trainer_tf.py +++ b/src/transformers/trainer_tf.py @@ -11,15 +11,18 @@ import numpy as np import tensorflow as tf from packaging.version import parse +from .integrations import is_comet_available, is_wandb_available from .modeling_tf_utils import TFPreTrainedModel from .optimization_tf import GradientAccumulator, create_optimizer -from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, is_wandb_available, set_seed +from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed from .training_args_tf import TFTrainingArguments if is_wandb_available(): import wandb +if is_comet_available(): + import comet_ml logger = logging.getLogger(__name__) @@ -96,6 +99,14 @@ class TFTrainer: "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface." ) + if is_comet_available(): + self.setup_comet() + elif os.environ.get("COMET_MODE") != "DISABLED": + logger.info( + "To use comet_ml logging, run `pip/conda install comet_ml` " + "see https://www.comet.ml/docs/python-sdk/huggingface/" + ) + set_seed(self.args.seed) def get_train_tfdataset(self) -> tf.data.Dataset: @@ -218,6 +229,36 @@ class TFTrainer: combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()} wandb.init(project=os.getenv("WANDB_PROJECT", "huggingface"), config=combined_dict, name=self.args.run_name) + def setup_comet(self): + """ + Setup the optional Comet.ml integration. + + Environment: + COMET_MODE: + (Optional): str - "OFFLINE", "ONLINE", or "DISABLED" + COMET_PROJECT_NAME: + (Optional): str - Comet.ml project name for experiments + COMET_OFFLINE_DIRECTORY: + (Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE" + + For a number of configurable items in the environment, + see `here `__ + """ + comet_mode = os.getenv("COMET_MODE", "ONLINE").upper() + args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")} + experiment = None + if comet_mode == "ONLINE": + experiment = comet_ml.Experiment(**args) + logger.info("Automatic Comet.ml online logging enabled") + elif comet_mode == "OFFLINE": + args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./") + experiment = comet_ml.OfflineExperiment(**args) + logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished") + if experiment is not None: + experiment._set_model_graph(self.model, framework="transformers") + experiment._log_parameters(self.args, prefix="args/", framework="transformers") + experiment._log_parameters(self.model.config, prefix="config/", framework="transformers") + def prediction_loop( self, dataset: tf.data.Dataset, @@ -336,6 +377,13 @@ class TFTrainer: if is_wandb_available(): wandb.log(logs, step=self.global_step) + if is_comet_available(): + experiment = comet_ml.config.get_global_experiment() + if experiment is not None: + experiment._log_metrics( + logs, step=self.global_step, epoch=self.epoch_logging, framework="transformers" + ) + output = {**logs, **{"step": self.global_step}} logger.info(output) diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index 1a4e995072..5bfdddb071 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -1,4 +1,3 @@ -import os import random from typing import Dict, NamedTuple, Optional @@ -7,23 +6,6 @@ import numpy as np from .file_utils import is_tf_available, is_torch_available -try: - import wandb - - wandb.ensure_configured() - if wandb.api.api_key is None: - _has_wandb = False - wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.") - else: - _has_wandb = False if os.getenv("WANDB_DISABLED") else True -except (ImportError, AttributeError): - _has_wandb = False - - -def is_wandb_available(): - return _has_wandb - - def set_seed(seed: int): """ Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf``