Adds comet_ml to the list of auto-experiment loggers (#6176)
* Support for Comet.ml * Need to import comet first * Log this model, not the one in the backprop step * Log args as hyperparameters; use framework to allow fine control * Log hyperparameters with context * Apply black formatting * isort fix integrations * isort fix __init__ * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer_tf.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Address review comments * Style + Quality, remove Tensorboard import test Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
This commit is contained in:
@@ -81,7 +81,13 @@ Feedback and more use cases and benchmarks involving TPUs are welcome, please sh
|
||||
|
||||
## Logging & Experiment tracking
|
||||
|
||||
You can easily log and monitor your runs code. [TensorBoard](https://www.tensorflow.org/tensorboard) and [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface) are currently supported.
|
||||
You can easily log and monitor your runs code. The following are currently supported:
|
||||
|
||||
* [TensorBoard](https://www.tensorflow.org/tensorboard)
|
||||
* [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface)
|
||||
* [Comet ML](https://www.comet.ml/docs/python-sdk/huggingface/)
|
||||
|
||||
### Weights & Biases
|
||||
|
||||
To use Weights & Biases, install the wandb package with:
|
||||
|
||||
@@ -104,6 +110,18 @@ wandb.login()
|
||||
|
||||
Whenever you use `Trainer` or `TFTrainer` classes, your losses, evaluation metrics, model topology and gradients (for `Trainer` only) will automatically be logged.
|
||||
|
||||
For advanced configuration and examples, refer to the [W&B documentation](https://docs.wandb.com/library/integrations/huggingface).
|
||||
|
||||
When using 🤗 Transformers with PyTorch Lightning, runs can be tracked through `WandbLogger`. Refer to related [documentation & examples](https://docs.wandb.com/library/frameworks/pytorch/lightning).
|
||||
|
||||
### Comet.ml
|
||||
|
||||
To use `comet_ml`, install the Python package with:
|
||||
|
||||
```bash
|
||||
pip install comet_ml
|
||||
```
|
||||
|
||||
or if in a Conda environment:
|
||||
|
||||
```bash
|
||||
conda install -c comet_ml -c anaconda -c conda-forge comet_ml
|
||||
```
|
||||
|
||||
@@ -88,6 +88,9 @@ from .file_utils import (
|
||||
)
|
||||
from .hf_argparser import HfArgumentParser
|
||||
|
||||
# Integrations
|
||||
from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available
|
||||
|
||||
# Model Cards
|
||||
from .modelcard import ModelCard
|
||||
|
||||
|
||||
48
src/transformers/integrations.py
Normal file
48
src/transformers/integrations.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# Integrations with other Python libraries
|
||||
|
||||
import os
|
||||
|
||||
|
||||
try:
|
||||
import comet_ml # noqa: F401
|
||||
|
||||
_has_comet = True
|
||||
except (ImportError):
|
||||
_has_comet = False
|
||||
|
||||
|
||||
try:
|
||||
import wandb
|
||||
|
||||
wandb.ensure_configured()
|
||||
if wandb.api.api_key is None:
|
||||
_has_wandb = False
|
||||
wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.")
|
||||
else:
|
||||
_has_wandb = False if os.getenv("WANDB_DISABLED") else True
|
||||
except (ImportError, AttributeError):
|
||||
_has_wandb = False
|
||||
|
||||
try:
|
||||
from torch.utils.tensorboard import SummaryWriter # noqa: F401
|
||||
|
||||
_has_tensorboard = True
|
||||
except ImportError:
|
||||
try:
|
||||
from tensorboardX import SummaryWriter # noqa: F401
|
||||
|
||||
_has_tensorboard = True
|
||||
except ImportError:
|
||||
_has_tensorboard = False
|
||||
|
||||
|
||||
def is_wandb_available():
|
||||
return _has_wandb
|
||||
|
||||
|
||||
def is_comet_available():
|
||||
return _has_comet
|
||||
|
||||
|
||||
def is_tensorboard_available():
|
||||
return _has_tensorboard
|
||||
@@ -20,16 +20,10 @@ from tqdm.auto import tqdm, trange
|
||||
|
||||
from .data.data_collator import DataCollator, default_data_collator
|
||||
from .file_utils import is_torch_tpu_available
|
||||
from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available
|
||||
from .modeling_utils import PreTrainedModel
|
||||
from .optimization import AdamW, get_linear_schedule_with_warmup
|
||||
from .trainer_utils import (
|
||||
PREFIX_CHECKPOINT_DIR,
|
||||
EvalPrediction,
|
||||
PredictionOutput,
|
||||
TrainOutput,
|
||||
is_wandb_available,
|
||||
set_seed,
|
||||
)
|
||||
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, TrainOutput, set_seed
|
||||
from .training_args import TrainingArguments
|
||||
|
||||
|
||||
@@ -53,26 +47,17 @@ if is_torch_tpu_available():
|
||||
import torch_xla.debug.metrics as met
|
||||
import torch_xla.distributed.parallel_loader as pl
|
||||
|
||||
if is_tensorboard_available():
|
||||
try:
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
_has_tensorboard = True
|
||||
except ImportError:
|
||||
try:
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
_has_tensorboard = True
|
||||
except ImportError:
|
||||
_has_tensorboard = False
|
||||
|
||||
|
||||
def is_tensorboard_available():
|
||||
return _has_tensorboard
|
||||
|
||||
|
||||
if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
if is_comet_available():
|
||||
import comet_ml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -210,6 +195,13 @@ class Trainer:
|
||||
"You are instantiating a Trainer but W&B is not installed. To use wandb logging, "
|
||||
"run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
|
||||
)
|
||||
if is_comet_available():
|
||||
self.setup_comet()
|
||||
elif os.environ.get("COMET_MODE") != "DISABLED":
|
||||
logger.info(
|
||||
"To use comet_ml logging, run `pip/conda install comet_ml` "
|
||||
"see https://www.comet.ml/docs/python-sdk/huggingface/"
|
||||
)
|
||||
set_seed(self.args.seed)
|
||||
# Create output directory if needed
|
||||
if self.is_world_process_zero():
|
||||
@@ -393,6 +385,37 @@ class Trainer:
|
||||
self.model, log=os.getenv("WANDB_WATCH", "gradients"), log_freq=max(100, self.args.logging_steps)
|
||||
)
|
||||
|
||||
def setup_comet(self):
|
||||
"""
|
||||
Setup the optional Comet.ml integration.
|
||||
|
||||
Environment:
|
||||
COMET_MODE:
|
||||
(Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
|
||||
COMET_PROJECT_NAME:
|
||||
(Optional): str - Comet.ml project name for experiments
|
||||
COMET_OFFLINE_DIRECTORY:
|
||||
(Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"
|
||||
|
||||
For a number of configurable items in the environment,
|
||||
see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
|
||||
"""
|
||||
if self.is_world_master():
|
||||
comet_mode = os.getenv("COMET_MODE", "ONLINE").upper()
|
||||
args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")}
|
||||
experiment = None
|
||||
if comet_mode == "ONLINE":
|
||||
experiment = comet_ml.Experiment(**args)
|
||||
logger.info("Automatic Comet.ml online logging enabled")
|
||||
elif comet_mode == "OFFLINE":
|
||||
args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./")
|
||||
experiment = comet_ml.OfflineExperiment(**args)
|
||||
logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished")
|
||||
if experiment is not None:
|
||||
experiment._set_model_graph(self.model, framework="transformers")
|
||||
experiment._log_parameters(self.args, prefix="args/", framework="transformers")
|
||||
experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
|
||||
|
||||
def num_examples(self, dataloader: DataLoader) -> int:
|
||||
"""
|
||||
Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset.
|
||||
@@ -655,6 +678,11 @@ class Trainer:
|
||||
if is_wandb_available():
|
||||
if self.is_world_process_zero():
|
||||
wandb.log(logs, step=self.global_step)
|
||||
if is_comet_available():
|
||||
if self.is_world_process_zero():
|
||||
experiment = comet_ml.config.get_global_experiment()
|
||||
if experiment is not None:
|
||||
experiment._log_metrics(logs, step=self.global_step, epoch=self.epoch, framework="transformers")
|
||||
output = {**logs, **{"step": self.global_step}}
|
||||
if iterator is not None:
|
||||
iterator.write(output)
|
||||
|
||||
@@ -11,15 +11,18 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
from packaging.version import parse
|
||||
|
||||
from .integrations import is_comet_available, is_wandb_available
|
||||
from .modeling_tf_utils import TFPreTrainedModel
|
||||
from .optimization_tf import GradientAccumulator, create_optimizer
|
||||
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, is_wandb_available, set_seed
|
||||
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed
|
||||
from .training_args_tf import TFTrainingArguments
|
||||
|
||||
|
||||
if is_wandb_available():
|
||||
import wandb
|
||||
|
||||
if is_comet_available():
|
||||
import comet_ml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -96,6 +99,14 @@ class TFTrainer:
|
||||
"run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
|
||||
)
|
||||
|
||||
if is_comet_available():
|
||||
self.setup_comet()
|
||||
elif os.environ.get("COMET_MODE") != "DISABLED":
|
||||
logger.info(
|
||||
"To use comet_ml logging, run `pip/conda install comet_ml` "
|
||||
"see https://www.comet.ml/docs/python-sdk/huggingface/"
|
||||
)
|
||||
|
||||
set_seed(self.args.seed)
|
||||
|
||||
def get_train_tfdataset(self) -> tf.data.Dataset:
|
||||
@@ -218,6 +229,36 @@ class TFTrainer:
|
||||
combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()}
|
||||
wandb.init(project=os.getenv("WANDB_PROJECT", "huggingface"), config=combined_dict, name=self.args.run_name)
|
||||
|
||||
def setup_comet(self):
|
||||
"""
|
||||
Setup the optional Comet.ml integration.
|
||||
|
||||
Environment:
|
||||
COMET_MODE:
|
||||
(Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
|
||||
COMET_PROJECT_NAME:
|
||||
(Optional): str - Comet.ml project name for experiments
|
||||
COMET_OFFLINE_DIRECTORY:
|
||||
(Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"
|
||||
|
||||
For a number of configurable items in the environment,
|
||||
see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
|
||||
"""
|
||||
comet_mode = os.getenv("COMET_MODE", "ONLINE").upper()
|
||||
args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")}
|
||||
experiment = None
|
||||
if comet_mode == "ONLINE":
|
||||
experiment = comet_ml.Experiment(**args)
|
||||
logger.info("Automatic Comet.ml online logging enabled")
|
||||
elif comet_mode == "OFFLINE":
|
||||
args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./")
|
||||
experiment = comet_ml.OfflineExperiment(**args)
|
||||
logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished")
|
||||
if experiment is not None:
|
||||
experiment._set_model_graph(self.model, framework="transformers")
|
||||
experiment._log_parameters(self.args, prefix="args/", framework="transformers")
|
||||
experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
|
||||
|
||||
def prediction_loop(
|
||||
self,
|
||||
dataset: tf.data.Dataset,
|
||||
@@ -336,6 +377,13 @@ class TFTrainer:
|
||||
if is_wandb_available():
|
||||
wandb.log(logs, step=self.global_step)
|
||||
|
||||
if is_comet_available():
|
||||
experiment = comet_ml.config.get_global_experiment()
|
||||
if experiment is not None:
|
||||
experiment._log_metrics(
|
||||
logs, step=self.global_step, epoch=self.epoch_logging, framework="transformers"
|
||||
)
|
||||
|
||||
output = {**logs, **{"step": self.global_step}}
|
||||
|
||||
logger.info(output)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
import random
|
||||
from typing import Dict, NamedTuple, Optional
|
||||
|
||||
@@ -7,23 +6,6 @@ import numpy as np
|
||||
from .file_utils import is_tf_available, is_torch_available
|
||||
|
||||
|
||||
try:
|
||||
import wandb
|
||||
|
||||
wandb.ensure_configured()
|
||||
if wandb.api.api_key is None:
|
||||
_has_wandb = False
|
||||
wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.")
|
||||
else:
|
||||
_has_wandb = False if os.getenv("WANDB_DISABLED") else True
|
||||
except (ImportError, AttributeError):
|
||||
_has_wandb = False
|
||||
|
||||
|
||||
def is_wandb_available():
|
||||
return _has_wandb
|
||||
|
||||
|
||||
def set_seed(seed: int):
|
||||
"""
|
||||
Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf``
|
||||
|
||||
Reference in New Issue
Block a user