From 241759101e7104192d01a07fc70432fa02ae8cb7 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Tue, 12 May 2020 21:52:01 -0400 Subject: [PATCH] (v2) Improvements to the wandb integration (#4324) * Improvements to the wandb integration * small reorg + no global necessary * feat(trainer): log epoch and final metrics * Simplify logging a bit * Fixup * Fix crash when just running eval Co-authored-by: Chris Van Pelt Co-authored-by: Boris Dayma --- .../run_language_modeling.py | 2 +- examples/test_examples.py | 2 +- .../token-classification/test_ner_examples.py | 4 +- src/transformers/trainer.py | 116 ++++++++++++------ tests/test_trainer.py | 2 +- 5 files changed, 81 insertions(+), 45 deletions(-) diff --git a/examples/language-modeling/run_language_modeling.py b/examples/language-modeling/run_language_modeling.py index 1034f2dc9e..55f8b9a496 100644 --- a/examples/language-modeling/run_language_modeling.py +++ b/examples/language-modeling/run_language_modeling.py @@ -265,7 +265,7 @@ def main(): eval_output = trainer.evaluate() - perplexity = math.exp(eval_output["loss"]) + perplexity = math.exp(eval_output["eval_loss"]) result = {"perplexity": perplexity} output_eval_file = os.path.join(training_args.output_dir, "eval_results_lm.txt") diff --git a/examples/test_examples.py b/examples/test_examples.py index 84dad0a546..cf1d0efd9d 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -72,7 +72,7 @@ class ExamplesTests(unittest.TestCase): """.split() with patch.object(sys, "argv", testargs): result = run_glue.main() - del result["loss"] + del result["eval_loss"] for value in result.values(): self.assertGreaterEqual(value, 0.75) diff --git a/examples/token-classification/test_ner_examples.py b/examples/token-classification/test_ner_examples.py index 7d36f0403a..c7ab00fe76 100644 --- a/examples/token-classification/test_ner_examples.py +++ b/examples/token-classification/test_ner_examples.py @@ -6,7 +6,7 @@ from unittest.mock import patch import run_ner -logging.basicConfig(level=logging.DEBUG) +logging.basicConfig(level=logging.INFO) logger = logging.getLogger() @@ -30,4 +30,4 @@ class ExamplesTests(unittest.TestCase): """.split() with patch.object(sys, "argv", ["run.py"] + testargs): result = run_ner.main() - self.assertLess(result["loss"], 1.5) + self.assertLess(result["eval_loss"], 1.5) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index d07a7a8adf..00300279c4 100644 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -61,7 +61,12 @@ def is_tensorboard_available(): try: import wandb - _has_wandb = True + wandb.ensure_configured() + if wandb.api.api_key is None: + _has_wandb = False + wandb.termwarn("W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.") + else: + _has_wandb = False if os.getenv("WANDB_DISABLED") else True except ImportError: _has_wandb = False @@ -114,6 +119,8 @@ class Trainer: prediction_loss_only: bool tb_writer: Optional["SummaryWriter"] = None optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = None + global_step: Optional[int] = None + epoch: Optional[float] = None def __init__( self, @@ -154,9 +161,12 @@ class Trainer: logger.warning( "You are instantiating a Trainer but Tensorboard is not installed. You should consider installing it." ) - if not is_wandb_available(): + if is_wandb_available(): + self._setup_wandb() + else: logger.info( - "You are instantiating a Trainer but wandb is not installed. Install it to use Weights & Biases logging." + "You are instantiating a Trainer but W&B is not installed. To use wandb logging, " + "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface." ) set_seed(self.args.seed) # Create output directory if needed @@ -263,11 +273,25 @@ class Trainer: """ Setup the optional Weights & Biases (`wandb`) integration. - One can override this method to customize the setup if needed. + One can override this method to customize the setup if needed. Find more information at https://docs.wandb.com/huggingface + You can also override the following environment variables: + + Environment: + WANDB_WATCH: + (Optional, ["gradients", "all", "false"]) "gradients" by default, set to "false" to disable gradient logging + or "all" to log gradients and parameters + WANDB_PROJECT: + (Optional): str - "huggingface" by default, set this to a custom string to store results in a different project + WANDB_DISABLED: + (Optional): boolean - defaults to false, set to "true" to disable wandb entirely """ - wandb.init(name=self.args.logging_dir, config=vars(self.args)) + logger.info('Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"') + wandb.init(project=os.getenv("WANDB_PROJECT", "huggingface"), config=vars(self.args)) # keep track of model topology and gradients - wandb.watch(self.model) + if os.getenv("WANDB_WATCH") != "false": + wandb.watch( + self.model, log=os.getenv("WANDB_WATCH", "gradients"), log_freq=max(100, self.args.logging_steps) + ) def num_examples(self, dataloader: Union[DataLoader, "pl.PerDeviceLoader"]) -> int: """ @@ -333,8 +357,6 @@ class Trainer: if self.tb_writer is not None: self.tb_writer.add_text("args", self.args.to_json_string()) self.tb_writer.add_hparams(self.args.to_sanitized_dict(), metric_dict={}) - if is_wandb_available(): - self._setup_wandb() # Train! if is_tpu_available(): @@ -353,25 +375,26 @@ class Trainer: logger.info(" Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) - global_step = 0 + self.global_step = 0 + self.epoch = 0 epochs_trained = 0 steps_trained_in_current_epoch = 0 # Check if continuing training from a checkpoint if model_path is not None: # set global_step to global_step of last saved checkpoint from model path try: - global_step = int(model_path.split("-")[-1].split("/")[0]) - epochs_trained = global_step // (len(train_dataloader) // self.args.gradient_accumulation_steps) - steps_trained_in_current_epoch = global_step % ( + self.global_step = int(model_path.split("-")[-1].split("/")[0]) + epochs_trained = self.global_step // (len(train_dataloader) // self.args.gradient_accumulation_steps) + steps_trained_in_current_epoch = self.global_step % ( len(train_dataloader) // self.args.gradient_accumulation_steps ) logger.info(" Continuing training from checkpoint, will skip to saved global_step") logger.info(" Continuing training from epoch %d", epochs_trained) - logger.info(" Continuing training from global step %d", global_step) + logger.info(" Continuing training from global step %d", self.global_step) logger.info(" Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch) except ValueError: - global_step = 0 + self.global_step = 0 logger.info(" Starting fine-tuning.") tr_loss = 0.0 @@ -408,34 +431,24 @@ class Trainer: scheduler.step() model.zero_grad() - global_step += 1 + self.global_step += 1 + self.epoch = epoch + (step + 1) / len(epoch_iterator) if self.is_local_master(): - if (self.args.logging_steps > 0 and global_step % self.args.logging_steps == 0) or ( - global_step == 1 and self.args.logging_first_step + if (self.args.logging_steps > 0 and self.global_step % self.args.logging_steps == 0) or ( + self.global_step == 1 and self.args.logging_first_step ): - logs = {} - if self.args.evaluate_during_training: - results = self.evaluate() - for key, value in results.items(): - eval_key = "eval_{}".format(key) - logs[eval_key] = value - - loss_scalar = (tr_loss - logging_loss) / self.args.logging_steps - learning_rate_scalar = scheduler.get_last_lr()[0] - logs["learning_rate"] = learning_rate_scalar - logs["loss"] = loss_scalar + logs: Dict[str, float] = {} + logs["loss"] = (tr_loss - logging_loss) / self.args.logging_steps + logs["learning_rate"] = scheduler.get_last_lr()[0] logging_loss = tr_loss - if self.tb_writer: - for k, v in logs.items(): - self.tb_writer.add_scalar(k, v, global_step) - if is_wandb_available(): - wandb.log(logs, step=global_step) + self._log(logs) - epoch_iterator.write(json.dumps({**logs, **{"step": global_step}})) + if self.args.evaluate_during_training: + self.evaluate() - if self.args.save_steps > 0 and global_step % self.args.save_steps == 0: + if self.args.save_steps > 0 and self.global_step % self.args.save_steps == 0: # In all cases (even distributed/parallel), self.model is always a reference # to the model we want to save. if hasattr(model, "module"): @@ -443,7 +456,9 @@ class Trainer: else: assert model is self.model # Save model checkpoint - output_dir = os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{global_step}") + output_dir = os.path.join( + self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.global_step}" + ) self.save_model(output_dir) self._rotate_checkpoints() @@ -451,10 +466,10 @@ class Trainer: torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")) logger.info("Saving optimizer and scheduler states to %s", output_dir) - if self.args.max_steps > 0 and global_step > self.args.max_steps: + if self.args.max_steps > 0 and self.global_step > self.args.max_steps: epoch_iterator.close() break - if self.args.max_steps > 0 and global_step > self.args.max_steps: + if self.args.max_steps > 0 and self.global_step > self.args.max_steps: train_iterator.close() break if self.args.tpu_metrics_debug: @@ -465,7 +480,21 @@ class Trainer: self.tb_writer.close() logger.info("\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n") - return TrainOutput(global_step, tr_loss / global_step) + return TrainOutput(self.global_step, tr_loss / self.global_step) + + def _log(self, logs: Dict[str, float], iterator: Optional[tqdm] = None) -> None: + if self.epoch is not None: + logs["epoch"] = self.epoch + if self.tb_writer: + for k, v in logs.items(): + self.tb_writer.add_scalar(k, v, self.global_step) + if is_wandb_available(): + wandb.log(logs, step=self.global_step) + output = json.dumps({**logs, **{"step": self.global_step}}) + if iterator is not None: + iterator.write(output) + else: + print(output) def _training_step( self, model: nn.Module, inputs: Dict[str, torch.Tensor], optimizer: torch.optim.Optimizer @@ -582,6 +611,8 @@ class Trainer: output = self._prediction_loop(eval_dataloader, description="Evaluation") + self._log(output.metrics) + if self.args.tpu_metrics_debug: # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.) xm.master_print(met.metrics_report()) @@ -663,6 +694,11 @@ class Trainer: else: metrics = {} if len(eval_losses) > 0: - metrics["loss"] = np.mean(eval_losses) + metrics["eval_loss"] = np.mean(eval_losses) + + # Prefix all keys with eval_ + for key in list(metrics.keys()): + if not key.startswith("eval_"): + metrics[f"eval_{key}"] = metrics.pop(key) return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics) diff --git a/tests/test_trainer.py b/tests/test_trainer.py index f397c71b96..417ebcb5a6 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -98,7 +98,7 @@ class TrainerIntegrationTest(unittest.TestCase): training_args = TrainingArguments(output_dir="./examples", no_cuda=True) trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset) result = trainer.evaluate() - self.assertLess(result["loss"], 0.2) + self.assertLess(result["eval_loss"], 0.2) def test_trainer_eval_lm(self): MODEL_ID = "distilroberta-base"