Adds comet_ml to the list of auto-experiment loggers (#6176)
* Support for Comet.ml * Need to import comet first * Log this model, not the one in the backprop step * Log args as hyperparameters; use framework to allow fine control * Log hyperparameters with context * Apply black formatting * isort fix integrations * isort fix __init__ * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/trainer_tf.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Address review comments * Style + Quality, remove Tensorboard import test Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
This commit is contained in:
@@ -81,7 +81,13 @@ Feedback and more use cases and benchmarks involving TPUs are welcome, please sh
|
|||||||
|
|
||||||
## Logging & Experiment tracking
|
## Logging & Experiment tracking
|
||||||
|
|
||||||
You can easily log and monitor your runs code. [TensorBoard](https://www.tensorflow.org/tensorboard) and [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface) are currently supported.
|
You can easily log and monitor your runs code. The following are currently supported:
|
||||||
|
|
||||||
|
* [TensorBoard](https://www.tensorflow.org/tensorboard)
|
||||||
|
* [Weights & Biases](https://docs.wandb.com/library/integrations/huggingface)
|
||||||
|
* [Comet ML](https://www.comet.ml/docs/python-sdk/huggingface/)
|
||||||
|
|
||||||
|
### Weights & Biases
|
||||||
|
|
||||||
To use Weights & Biases, install the wandb package with:
|
To use Weights & Biases, install the wandb package with:
|
||||||
|
|
||||||
@@ -104,6 +110,18 @@ wandb.login()
|
|||||||
|
|
||||||
Whenever you use `Trainer` or `TFTrainer` classes, your losses, evaluation metrics, model topology and gradients (for `Trainer` only) will automatically be logged.
|
Whenever you use `Trainer` or `TFTrainer` classes, your losses, evaluation metrics, model topology and gradients (for `Trainer` only) will automatically be logged.
|
||||||
|
|
||||||
For advanced configuration and examples, refer to the [W&B documentation](https://docs.wandb.com/library/integrations/huggingface).
|
|
||||||
|
|
||||||
When using 🤗 Transformers with PyTorch Lightning, runs can be tracked through `WandbLogger`. Refer to related [documentation & examples](https://docs.wandb.com/library/frameworks/pytorch/lightning).
|
When using 🤗 Transformers with PyTorch Lightning, runs can be tracked through `WandbLogger`. Refer to related [documentation & examples](https://docs.wandb.com/library/frameworks/pytorch/lightning).
|
||||||
|
|
||||||
|
### Comet.ml
|
||||||
|
|
||||||
|
To use `comet_ml`, install the Python package with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install comet_ml
|
||||||
|
```
|
||||||
|
|
||||||
|
or if in a Conda environment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
conda install -c comet_ml -c anaconda -c conda-forge comet_ml
|
||||||
|
```
|
||||||
|
|||||||
@@ -88,6 +88,9 @@ from .file_utils import (
|
|||||||
)
|
)
|
||||||
from .hf_argparser import HfArgumentParser
|
from .hf_argparser import HfArgumentParser
|
||||||
|
|
||||||
|
# Integrations
|
||||||
|
from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available
|
||||||
|
|
||||||
# Model Cards
|
# Model Cards
|
||||||
from .modelcard import ModelCard
|
from .modelcard import ModelCard
|
||||||
|
|
||||||
|
|||||||
48
src/transformers/integrations.py
Normal file
48
src/transformers/integrations.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# Integrations with other Python libraries
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import comet_ml # noqa: F401
|
||||||
|
|
||||||
|
_has_comet = True
|
||||||
|
except (ImportError):
|
||||||
|
_has_comet = False
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import wandb
|
||||||
|
|
||||||
|
wandb.ensure_configured()
|
||||||
|
if wandb.api.api_key is None:
|
||||||
|
_has_wandb = False
|
||||||
|
wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.")
|
||||||
|
else:
|
||||||
|
_has_wandb = False if os.getenv("WANDB_DISABLED") else True
|
||||||
|
except (ImportError, AttributeError):
|
||||||
|
_has_wandb = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from torch.utils.tensorboard import SummaryWriter # noqa: F401
|
||||||
|
|
||||||
|
_has_tensorboard = True
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from tensorboardX import SummaryWriter # noqa: F401
|
||||||
|
|
||||||
|
_has_tensorboard = True
|
||||||
|
except ImportError:
|
||||||
|
_has_tensorboard = False
|
||||||
|
|
||||||
|
|
||||||
|
def is_wandb_available():
|
||||||
|
return _has_wandb
|
||||||
|
|
||||||
|
|
||||||
|
def is_comet_available():
|
||||||
|
return _has_comet
|
||||||
|
|
||||||
|
|
||||||
|
def is_tensorboard_available():
|
||||||
|
return _has_tensorboard
|
||||||
@@ -20,16 +20,10 @@ from tqdm.auto import tqdm, trange
|
|||||||
|
|
||||||
from .data.data_collator import DataCollator, default_data_collator
|
from .data.data_collator import DataCollator, default_data_collator
|
||||||
from .file_utils import is_torch_tpu_available
|
from .file_utils import is_torch_tpu_available
|
||||||
|
from .integrations import is_comet_available, is_tensorboard_available, is_wandb_available
|
||||||
from .modeling_utils import PreTrainedModel
|
from .modeling_utils import PreTrainedModel
|
||||||
from .optimization import AdamW, get_linear_schedule_with_warmup
|
from .optimization import AdamW, get_linear_schedule_with_warmup
|
||||||
from .trainer_utils import (
|
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, TrainOutput, set_seed
|
||||||
PREFIX_CHECKPOINT_DIR,
|
|
||||||
EvalPrediction,
|
|
||||||
PredictionOutput,
|
|
||||||
TrainOutput,
|
|
||||||
is_wandb_available,
|
|
||||||
set_seed,
|
|
||||||
)
|
|
||||||
from .training_args import TrainingArguments
|
from .training_args import TrainingArguments
|
||||||
|
|
||||||
|
|
||||||
@@ -53,26 +47,17 @@ if is_torch_tpu_available():
|
|||||||
import torch_xla.debug.metrics as met
|
import torch_xla.debug.metrics as met
|
||||||
import torch_xla.distributed.parallel_loader as pl
|
import torch_xla.distributed.parallel_loader as pl
|
||||||
|
|
||||||
try:
|
if is_tensorboard_available():
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
|
||||||
|
|
||||||
_has_tensorboard = True
|
|
||||||
except ImportError:
|
|
||||||
try:
|
try:
|
||||||
from tensorboardX import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
|
||||||
_has_tensorboard = True
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
_has_tensorboard = False
|
from tensorboardX import SummaryWriter
|
||||||
|
|
||||||
|
|
||||||
def is_tensorboard_available():
|
|
||||||
return _has_tensorboard
|
|
||||||
|
|
||||||
|
|
||||||
if is_wandb_available():
|
if is_wandb_available():
|
||||||
import wandb
|
import wandb
|
||||||
|
|
||||||
|
if is_comet_available():
|
||||||
|
import comet_ml
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -210,6 +195,13 @@ class Trainer:
|
|||||||
"You are instantiating a Trainer but W&B is not installed. To use wandb logging, "
|
"You are instantiating a Trainer but W&B is not installed. To use wandb logging, "
|
||||||
"run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
|
"run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
|
||||||
)
|
)
|
||||||
|
if is_comet_available():
|
||||||
|
self.setup_comet()
|
||||||
|
elif os.environ.get("COMET_MODE") != "DISABLED":
|
||||||
|
logger.info(
|
||||||
|
"To use comet_ml logging, run `pip/conda install comet_ml` "
|
||||||
|
"see https://www.comet.ml/docs/python-sdk/huggingface/"
|
||||||
|
)
|
||||||
set_seed(self.args.seed)
|
set_seed(self.args.seed)
|
||||||
# Create output directory if needed
|
# Create output directory if needed
|
||||||
if self.is_world_process_zero():
|
if self.is_world_process_zero():
|
||||||
@@ -393,6 +385,37 @@ class Trainer:
|
|||||||
self.model, log=os.getenv("WANDB_WATCH", "gradients"), log_freq=max(100, self.args.logging_steps)
|
self.model, log=os.getenv("WANDB_WATCH", "gradients"), log_freq=max(100, self.args.logging_steps)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def setup_comet(self):
|
||||||
|
"""
|
||||||
|
Setup the optional Comet.ml integration.
|
||||||
|
|
||||||
|
Environment:
|
||||||
|
COMET_MODE:
|
||||||
|
(Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
|
||||||
|
COMET_PROJECT_NAME:
|
||||||
|
(Optional): str - Comet.ml project name for experiments
|
||||||
|
COMET_OFFLINE_DIRECTORY:
|
||||||
|
(Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"
|
||||||
|
|
||||||
|
For a number of configurable items in the environment,
|
||||||
|
see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
|
||||||
|
"""
|
||||||
|
if self.is_world_master():
|
||||||
|
comet_mode = os.getenv("COMET_MODE", "ONLINE").upper()
|
||||||
|
args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")}
|
||||||
|
experiment = None
|
||||||
|
if comet_mode == "ONLINE":
|
||||||
|
experiment = comet_ml.Experiment(**args)
|
||||||
|
logger.info("Automatic Comet.ml online logging enabled")
|
||||||
|
elif comet_mode == "OFFLINE":
|
||||||
|
args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./")
|
||||||
|
experiment = comet_ml.OfflineExperiment(**args)
|
||||||
|
logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished")
|
||||||
|
if experiment is not None:
|
||||||
|
experiment._set_model_graph(self.model, framework="transformers")
|
||||||
|
experiment._log_parameters(self.args, prefix="args/", framework="transformers")
|
||||||
|
experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
|
||||||
|
|
||||||
def num_examples(self, dataloader: DataLoader) -> int:
|
def num_examples(self, dataloader: DataLoader) -> int:
|
||||||
"""
|
"""
|
||||||
Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset.
|
Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset.
|
||||||
@@ -655,6 +678,11 @@ class Trainer:
|
|||||||
if is_wandb_available():
|
if is_wandb_available():
|
||||||
if self.is_world_process_zero():
|
if self.is_world_process_zero():
|
||||||
wandb.log(logs, step=self.global_step)
|
wandb.log(logs, step=self.global_step)
|
||||||
|
if is_comet_available():
|
||||||
|
if self.is_world_process_zero():
|
||||||
|
experiment = comet_ml.config.get_global_experiment()
|
||||||
|
if experiment is not None:
|
||||||
|
experiment._log_metrics(logs, step=self.global_step, epoch=self.epoch, framework="transformers")
|
||||||
output = {**logs, **{"step": self.global_step}}
|
output = {**logs, **{"step": self.global_step}}
|
||||||
if iterator is not None:
|
if iterator is not None:
|
||||||
iterator.write(output)
|
iterator.write(output)
|
||||||
|
|||||||
@@ -11,15 +11,18 @@ import numpy as np
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from packaging.version import parse
|
from packaging.version import parse
|
||||||
|
|
||||||
|
from .integrations import is_comet_available, is_wandb_available
|
||||||
from .modeling_tf_utils import TFPreTrainedModel
|
from .modeling_tf_utils import TFPreTrainedModel
|
||||||
from .optimization_tf import GradientAccumulator, create_optimizer
|
from .optimization_tf import GradientAccumulator, create_optimizer
|
||||||
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, is_wandb_available, set_seed
|
from .trainer_utils import PREFIX_CHECKPOINT_DIR, EvalPrediction, PredictionOutput, set_seed
|
||||||
from .training_args_tf import TFTrainingArguments
|
from .training_args_tf import TFTrainingArguments
|
||||||
|
|
||||||
|
|
||||||
if is_wandb_available():
|
if is_wandb_available():
|
||||||
import wandb
|
import wandb
|
||||||
|
|
||||||
|
if is_comet_available():
|
||||||
|
import comet_ml
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -96,6 +99,14 @@ class TFTrainer:
|
|||||||
"run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
|
"run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if is_comet_available():
|
||||||
|
self.setup_comet()
|
||||||
|
elif os.environ.get("COMET_MODE") != "DISABLED":
|
||||||
|
logger.info(
|
||||||
|
"To use comet_ml logging, run `pip/conda install comet_ml` "
|
||||||
|
"see https://www.comet.ml/docs/python-sdk/huggingface/"
|
||||||
|
)
|
||||||
|
|
||||||
set_seed(self.args.seed)
|
set_seed(self.args.seed)
|
||||||
|
|
||||||
def get_train_tfdataset(self) -> tf.data.Dataset:
|
def get_train_tfdataset(self) -> tf.data.Dataset:
|
||||||
@@ -218,6 +229,36 @@ class TFTrainer:
|
|||||||
combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()}
|
combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()}
|
||||||
wandb.init(project=os.getenv("WANDB_PROJECT", "huggingface"), config=combined_dict, name=self.args.run_name)
|
wandb.init(project=os.getenv("WANDB_PROJECT", "huggingface"), config=combined_dict, name=self.args.run_name)
|
||||||
|
|
||||||
|
def setup_comet(self):
|
||||||
|
"""
|
||||||
|
Setup the optional Comet.ml integration.
|
||||||
|
|
||||||
|
Environment:
|
||||||
|
COMET_MODE:
|
||||||
|
(Optional): str - "OFFLINE", "ONLINE", or "DISABLED"
|
||||||
|
COMET_PROJECT_NAME:
|
||||||
|
(Optional): str - Comet.ml project name for experiments
|
||||||
|
COMET_OFFLINE_DIRECTORY:
|
||||||
|
(Optional): str - folder to use for saving offline experiments when `COMET_MODE` is "OFFLINE"
|
||||||
|
|
||||||
|
For a number of configurable items in the environment,
|
||||||
|
see `here <https://www.comet.ml/docs/python-sdk/advanced/#comet-configuration-variables>`__
|
||||||
|
"""
|
||||||
|
comet_mode = os.getenv("COMET_MODE", "ONLINE").upper()
|
||||||
|
args = {"project_name": os.getenv("COMET_PROJECT_NAME", "huggingface")}
|
||||||
|
experiment = None
|
||||||
|
if comet_mode == "ONLINE":
|
||||||
|
experiment = comet_ml.Experiment(**args)
|
||||||
|
logger.info("Automatic Comet.ml online logging enabled")
|
||||||
|
elif comet_mode == "OFFLINE":
|
||||||
|
args["offline_directory"] = os.getenv("COMET_OFFLINE_DIRECTORY", "./")
|
||||||
|
experiment = comet_ml.OfflineExperiment(**args)
|
||||||
|
logger.info("Automatic Comet.ml offline logging enabled; use `comet upload` when finished")
|
||||||
|
if experiment is not None:
|
||||||
|
experiment._set_model_graph(self.model, framework="transformers")
|
||||||
|
experiment._log_parameters(self.args, prefix="args/", framework="transformers")
|
||||||
|
experiment._log_parameters(self.model.config, prefix="config/", framework="transformers")
|
||||||
|
|
||||||
def prediction_loop(
|
def prediction_loop(
|
||||||
self,
|
self,
|
||||||
dataset: tf.data.Dataset,
|
dataset: tf.data.Dataset,
|
||||||
@@ -336,6 +377,13 @@ class TFTrainer:
|
|||||||
if is_wandb_available():
|
if is_wandb_available():
|
||||||
wandb.log(logs, step=self.global_step)
|
wandb.log(logs, step=self.global_step)
|
||||||
|
|
||||||
|
if is_comet_available():
|
||||||
|
experiment = comet_ml.config.get_global_experiment()
|
||||||
|
if experiment is not None:
|
||||||
|
experiment._log_metrics(
|
||||||
|
logs, step=self.global_step, epoch=self.epoch_logging, framework="transformers"
|
||||||
|
)
|
||||||
|
|
||||||
output = {**logs, **{"step": self.global_step}}
|
output = {**logs, **{"step": self.global_step}}
|
||||||
|
|
||||||
logger.info(output)
|
logger.info(output)
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
import os
|
|
||||||
import random
|
import random
|
||||||
from typing import Dict, NamedTuple, Optional
|
from typing import Dict, NamedTuple, Optional
|
||||||
|
|
||||||
@@ -7,23 +6,6 @@ import numpy as np
|
|||||||
from .file_utils import is_tf_available, is_torch_available
|
from .file_utils import is_tf_available, is_torch_available
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import wandb
|
|
||||||
|
|
||||||
wandb.ensure_configured()
|
|
||||||
if wandb.api.api_key is None:
|
|
||||||
_has_wandb = False
|
|
||||||
wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.")
|
|
||||||
else:
|
|
||||||
_has_wandb = False if os.getenv("WANDB_DISABLED") else True
|
|
||||||
except (ImportError, AttributeError):
|
|
||||||
_has_wandb = False
|
|
||||||
|
|
||||||
|
|
||||||
def is_wandb_available():
|
|
||||||
return _has_wandb
|
|
||||||
|
|
||||||
|
|
||||||
def set_seed(seed: int):
|
def set_seed(seed: int):
|
||||||
"""
|
"""
|
||||||
Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf``
|
Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf``
|
||||||
|
|||||||
Reference in New Issue
Block a user