From dad414d5f9c20627ee6c16f62e8a2056916bf35b Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Mon, 21 Jun 2021 19:30:50 -0700 Subject: [PATCH] [trainer + examples] set log level from CLI (#12276) * set log level from CLI * add log_level_replica + test + extended docs * cleanup * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * rename datasets objects to allow datasets module * improve the doc * style * doc improve Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- docs/source/main_classes/trainer.rst | 68 +++++++++++++++++++ .../pytorch/translation/run_translation.py | 34 ++++++---- src/transformers/trainer.py | 4 ++ src/transformers/trainer_pt_utils.py | 4 +- src/transformers/training_args.py | 46 ++++++++++--- src/transformers/utils/logging.py | 4 ++ tests/test_trainer.py | 33 ++++++++- 7 files changed, 167 insertions(+), 26 deletions(-) diff --git a/docs/source/main_classes/trainer.rst b/docs/source/main_classes/trainer.rst index 35dfdcad33..866665eacf 100644 --- a/docs/source/main_classes/trainer.rst +++ b/docs/source/main_classes/trainer.rst @@ -119,6 +119,74 @@ TFTrainingArguments :members: +Logging +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default :class:`~transformers.Trainer` will use ``logging.INFO`` for the main process and ``logging.WARNING`` for +the replicas if any. + +These defaults can be overridden to use any of the 5 ``logging`` levels with :class:`~transformers.TrainingArguments`'s +arguments: + +- ``log_level`` - for the main process +- ``log_level_replica`` - for the replicas + +Further, if :class:`~transformers.TrainingArguments`'s ``log_on_each_node`` is set to ``False`` only the main node will +use the log level settings for its main process, all other nodes will use the log level settings for replicas. + +Note that :class:`~transformers.Trainer` is going to set ``transformers``'s log level separately for each node in its +:meth:`~transformers.Trainer.__init__`. So you may want to set this sooner (see the next example) if you tap into other +``transformers`` functionality before creating the :class:`~transformers.Trainer` object. + +Here is an example of how this can be used in an application: + +.. code-block:: python + + [...] + logger = logging.getLogger(__name__) + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + + # set the main code and the modules it uses to the same log-level according to the node + log_level = training_args.get_node_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + + trainer = Trainer(...) + +And then if you only want to see warnings on the main node and all other nodes to not print any most likely duplicated +warnings you could run it as: + +.. code-block:: bash + + my_app.py ... --log_level warning --log_level_replica error + +In the multi-node environment if you also don't want the logs to repeat for each node's main process, you will want to +change the above to: + +.. code-block:: bash + + my_app.py ... --log_level warning --log_level_replica error --log_on_each_node 0 + +and then only the main process of the first node will log at the "warning" level, and all other processes on the main +node and all processes on other nodes will log at the "error" level. + +If you need your application to be as quiet as possible you could do: + +.. code-block:: bash + + my_app.py ... --log_level error --log_level_replica error --log_on_each_node 0 + +(add ``--log_on_each_node 0`` if on multi-node environment) + + + Randomness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 8a5a6f531f..4411180044 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -24,6 +24,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets import numpy as np from datasets import load_dataset, load_metric @@ -243,16 +244,17 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + + log_level = training_args.get_node_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() logger.info(f"Training/evaluation parameters {training_args}") if data_args.source_prefix is None and model_args.model_name_or_path in [ @@ -296,7 +298,9 @@ def main(): # download the dataset. if data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) else: data_files = {} if data_args.train_file is not None: @@ -308,7 +312,7 @@ def main(): if data_args.test_file is not None: data_files["test"] = data_args.test_file extension = data_args.test_file.split(".")[-1] - datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -356,11 +360,11 @@ def main(): # Preprocessing the datasets. # We need to tokenize inputs and targets. if training_args.do_train: - column_names = datasets["train"].column_names + column_names = raw_datasets["train"].column_names elif training_args.do_eval: - column_names = datasets["validation"].column_names + column_names = raw_datasets["validation"].column_names elif training_args.do_predict: - column_names = datasets["test"].column_names + column_names = raw_datasets["test"].column_names else: logger.info("There is nothing to do. Please pass `do_train`, `do_eval` and/or `do_predict`.") return @@ -418,9 +422,9 @@ def main(): return model_inputs if training_args.do_train: - if "train" not in datasets: + if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") - train_dataset = datasets["train"] + train_dataset = raw_datasets["train"] if data_args.max_train_samples is not None: train_dataset = train_dataset.select(range(data_args.max_train_samples)) train_dataset = train_dataset.map( @@ -434,9 +438,9 @@ def main(): if training_args.do_eval: max_target_length = data_args.val_max_target_length - if "validation" not in datasets: + if "validation" not in raw_datasets: raise ValueError("--do_eval requires a validation dataset") - eval_dataset = datasets["validation"] + eval_dataset = raw_datasets["validation"] if data_args.max_eval_samples is not None: eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) eval_dataset = eval_dataset.map( @@ -450,9 +454,9 @@ def main(): if training_args.do_predict: max_target_length = data_args.val_max_target_length - if "test" not in datasets: + if "test" not in raw_datasets: raise ValueError("--do_predict requires a test dataset") - predict_dataset = datasets["test"] + predict_dataset = raw_datasets["test"] if data_args.max_predict_samples is not None: predict_dataset = predict_dataset.select(range(data_args.max_predict_samples)) predict_dataset = predict_dataset.map( diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 70aeec25ca..60c344be7a 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -290,6 +290,10 @@ class Trainer: self._memory_tracker = TrainerMemoryTracker(self.args.skip_memory_metrics) self._memory_tracker.start() + # set the correct log level depending on the node + log_level = args.get_node_log_level() + logging.set_verbosity(log_level) + # force device and distributed setup init explicitly args._setup_devices diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py index ec055e647e..91845eb2a0 100644 --- a/src/transformers/trainer_pt_utils.py +++ b/src/transformers/trainer_pt_utils.py @@ -905,12 +905,12 @@ def log_metrics(self, split, metrics): if not self.is_world_process_zero(): return - logger.info(f"***** {split} metrics *****") + print(f"***** {split} metrics *****") metrics_formatted = self.metrics_format(metrics) k_width = max(len(str(x)) for x in metrics_formatted.keys()) v_width = max(len(str(x)) for x in metrics_formatted.values()) for key in sorted(metrics_formatted.keys()): - logger.info(f" {key: <{k_width}} = {metrics_formatted[key]:>{v_width}}") + print(f" {key: <{k_width}} = {metrics_formatted[key]:>{v_width}}") def save_metrics(self, split, metrics, combined=True): diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index af8b269d95..69f1693abb 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -48,6 +48,8 @@ if is_sagemaker_mp_enabled(): logger = logging.get_logger(__name__) +log_levels = logging.get_log_levels_dict().copy() +trainer_log_levels = dict(**log_levels, passive=-1) def default_logdir() -> str: @@ -144,6 +146,15 @@ class TrainingArguments: warmup_steps (:obj:`int`, `optional`, defaults to 0): Number of steps used for a linear warmup from 0 to :obj:`learning_rate`. Overrides any effect of :obj:`warmup_ratio`. + log_level (:obj:`str`, `optional`, defaults to ``passive``): + Logger log level to use on the main process. Possible choices are the log levels as strings: 'debug', + 'info', 'warning', 'error' and 'critical', plus a 'passive' level which doesn't set anything and lets the + application set the level. + log_level_replica (:obj:`str`, `optional`, defaults to ``passive``): + Logger log level to use on replicas. Same choices as ``log_level``" + log_on_each_node (:obj:`bool`, `optional`, defaults to :obj:`True`): + In multinode distributed training, whether to log using :obj:`log_level` once per node, or only on the main + node. logging_dir (:obj:`str`, `optional`): `TensorBoard `__ log directory. Will default to `runs/**CURRENT_DATETIME_HOSTNAME**`. @@ -316,8 +327,6 @@ class TrainingArguments: :class:`~transformers.Trainer`, it's intended to be used by your training/evaluation scripts instead. See the `example scripts `__ for more details. - log_on_each_node (:obj:`bool`, `optional`, defaults to :obj:`True`): - In multinode distributed training, whether to log once per node, or only on the main node. """ output_dir: str = field( @@ -397,6 +406,26 @@ class TrainingArguments: ) warmup_steps: int = field(default=0, metadata={"help": "Linear warmup over warmup_steps."}) + log_level: Optional[str] = field( + default="passive", + metadata={ + "help": "Logger log level to use on the main node. Possible choices are the log levels as strings: 'debug', 'info', 'warning', 'error' and 'critical', plus a 'passive' level which doesn't set anything and lets the application set the level. Defaults to 'passive'.", + "choices": trainer_log_levels.keys(), + }, + ) + log_level_replica: Optional[str] = field( + default="passive", + metadata={ + "help": "Logger log level to use on replica nodes. Same choices and defaults as ``log_level``", + "choices": trainer_log_levels.keys(), + }, + ) + log_on_each_node: bool = field( + default=True, + metadata={ + "help": "When doing a multinode distributed training, whether to log once per node or just once on the main node." + }, + ) logging_dir: Optional[str] = field(default_factory=default_logdir, metadata={"help": "Tensorboard log dir."}) logging_strategy: IntervalStrategy = field( default="steps", @@ -561,12 +590,6 @@ class TrainingArguments: default=None, metadata={"help": "The path to a folder with a valid checkpoint for your model."}, ) - log_on_each_node: bool = field( - default=True, - metadata={ - "help": "When doing a multinode distributed training, whether to log once per node or just once on the main node." - }, - ) _n_gpu: int = field(init=False, repr=False, default=-1) mp_parameters: str = field( default="", @@ -580,6 +603,8 @@ class TrainingArguments: if env_local_rank != -1 and env_local_rank != self.local_rank: self.local_rank = env_local_rank + self.log_level = trainer_log_levels[self.log_level] + # expand paths, if not os.makedirs("~/bar") will make directory # in the current directory instead of the actual home #  see https://github.com/huggingface/transformers/issues/10628 @@ -889,6 +914,11 @@ class TrainingArguments: else: return self.process_index == 0 + def get_node_log_level(self): + log_level_main_node = logging.INFO if self.log_level == -1 else self.log_level + log_level_replica_node = logging.WARNING if self.log_level_replica == -1 else self.log_level_replica + return log_level_main_node if self.should_log else log_level_replica_node + @property def place_model_on_device(self): """ diff --git a/src/transformers/utils/logging.py b/src/transformers/utils/logging.py index 8e7b592cf0..4abb94aa4e 100644 --- a/src/transformers/utils/logging.py +++ b/src/transformers/utils/logging.py @@ -102,6 +102,10 @@ def _reset_library_root_logger() -> None: _default_handler = None +def get_log_levels_dict(): + return log_levels + + def get_logger(name: Optional[str] = None) -> logging.Logger: """ Return a logger with the specified name. diff --git a/tests/test_trainer.py b/tests/test_trainer.py index 1247e0250d..7e3e02e618 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -27,12 +27,20 @@ import numpy as np from huggingface_hub import HfApi from requests.exceptions import HTTPError -from transformers import AutoTokenizer, IntervalStrategy, PretrainedConfig, TrainingArguments, is_torch_available +from transformers import ( + AutoTokenizer, + IntervalStrategy, + PretrainedConfig, + TrainingArguments, + is_torch_available, + logging, +) from transformers.file_utils import WEIGHTS_NAME from transformers.testing_utils import ( ENDPOINT_STAGING, PASS, USER, + CaptureLogger, TestCasePlus, get_gpu_count, get_tests_dir, @@ -614,6 +622,29 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): self.assertFalse(torch.allclose(trainer.model.b, b)) self.assertGreater(trainer.optimizer.state_dict()["param_groups"][0]["lr"], 0) + def test_log_level(self): + # testing only --log_level (--log_level_replica requires multiple nodes) + logger = logging.get_logger() + log_info_string = "Running training" + + # test with the default log level - should be info and thus log + with CaptureLogger(logger) as cl: + trainer = get_regression_trainer() + trainer.train() + self.assertIn(log_info_string, cl.out) + + # test with low log level - lower than info + with CaptureLogger(logger) as cl: + trainer = get_regression_trainer(log_level="debug") + trainer.train() + self.assertIn(log_info_string, cl.out) + + # test with high log level - should be quiet + with CaptureLogger(logger) as cl: + trainer = get_regression_trainer(log_level="error") + trainer.train() + self.assertNotIn(log_info_string, cl.out) + def test_model_init(self): train_dataset = RegressionDataset() args = TrainingArguments("./regression", learning_rate=0.1)